예제 #1
0
def make_request(request, is_bot_or_admin):
    """Registers the request in the DB.

  Fills up some values.

  If parent_task_id is set, properties for the parent are used:
  - priority: defaults to parent.priority - 1
  - user: overriden by parent.user

  """
    assert request.__class__ is TaskRequest
    if request.parent_task_id:
        run_result_key = task_pack.unpack_run_result_key(request.parent_task_id)
        result_summary_key = task_pack.run_result_key_to_result_summary_key(run_result_key)
        request_key = task_pack.result_summary_key_to_request_key(result_summary_key)
        parent = request_key.get()
        if not parent:
            raise ValueError("parent_task_id is not a valid task")
        request.priority = max(min(request.priority, parent.priority - 1), 0)
        # Drop the previous user.
        request.user = parent.user

    # If the priority is below 100, make sure the user has right to do so.
    if request.priority < 100 and not is_bot_or_admin:
        # Silently drop the priority of normal users.
        request.priority = 100

    request.authenticated = auth.get_current_identity()
    if not request.properties.is_terminate and request.properties.grace_period_secs is None:
        request.properties.grace_period_secs = 30
    if request.properties.idempotent is None:
        request.properties.idempotent = False
    _put_request(request)
    return request
예제 #2
0
  def get_request_and_result(self, task_id):
    """Retrieves the TaskRequest for 'task_id' and enforces the ACL.

    Supports both TaskResultSummary (ends with 0) or TaskRunResult (ends with 1
    or 2).

    Returns:
      tuple(TaskRequest, result): result can be either for a TaskRunResult or a
                                  TaskResultSummay.
    """
    try:
      key = task_pack.unpack_result_summary_key(task_id)
      request_key = task_pack.result_summary_key_to_request_key(key)
    except ValueError:
      try:
        key = task_pack.unpack_run_result_key(task_id)
        request_key = task_pack.result_summary_key_to_request_key(
            task_pack.run_result_key_to_result_summary_key(key))
      except ValueError:
        self.abort(404, 'Invalid key format.')
    request, result = ndb.get_multi((request_key, key))
    if not request or not result:
      self.abort(404, '%s not found.' % key.id())
    if not request.has_access:
      self.abort(403, '%s is not accessible.' % key.id())
    return request, result
예제 #3
0
  def post(self, task_id=None):
    request = self.parse_body()
    bot_id = request.get('id')
    task_id = request.get('task_id', '')
    message = request.get('message', 'unknown')

    bot_management.bot_event(
        event_type='task_error', bot_id=bot_id,
        external_ip=self.request.remote_addr, dimensions=None, state=None,
        version=None, quarantined=None, task_id=task_id, task_name=None,
        message=message)
    line = (
        'Bot: https://%s/restricted/bot/%s\n'
        'Task failed: https://%s/user/task/%s\n'
        '%s') % (
        app_identity.get_default_version_hostname(), bot_id,
        app_identity.get_default_version_hostname(), task_id,
        message)
    ereporter2.log_request(self.request, source='bot', message=line)

    msg = log_unexpected_keys(
        self.EXPECTED_KEYS, request, self.request, 'bot', 'keys')
    if msg:
      self.abort_with_error(400, error=msg)

    msg = task_scheduler.bot_kill_task(
        task_pack.unpack_run_result_key(task_id), bot_id)
    if msg:
      logging.error(msg)
      self.abort_with_error(400, error=msg)
    self.send_response({})
예제 #4
0
    def post(self, task_id=None):
        request = self.parse_body()
        bot_id = request.get('id')
        task_id = request.get('task_id', '')
        message = request.get('message', 'unknown')

        bot_management.bot_event(event_type='task_error',
                                 bot_id=bot_id,
                                 external_ip=self.request.remote_addr,
                                 dimensions=None,
                                 state=None,
                                 version=None,
                                 quarantined=None,
                                 task_id=task_id,
                                 task_name=None,
                                 message=message)
        line = ('Bot: https://%s/restricted/bot/%s\n'
                'Task failed: https://%s/user/task/%s\n'
                '%s') % (app_identity.get_default_version_hostname(), bot_id,
                         app_identity.get_default_version_hostname(), task_id,
                         message)
        ereporter2.log_request(self.request, source='bot', message=line)

        msg = log_unexpected_keys(self.EXPECTED_KEYS, request, self.request,
                                  'bot', 'keys')
        if msg:
            self.abort_with_error(400, error=msg)

        msg = task_scheduler.bot_kill_task(
            task_pack.unpack_run_result_key(task_id), bot_id)
        if msg:
            logging.error(msg)
            self.abort_with_error(400, error=msg)
        self.send_response({})
예제 #5
0
def init_new_request(request, allow_high_priority):
  """Initializes a new TaskRequest but doesn't store it.

  Fills up some values and does minimal checks.

  If parent_task_id is set, properties for the parent are used:
  - priority: defaults to parent.priority - 1
  - user: overridden by parent.user

  """
  assert request.__class__ is TaskRequest, request
  if request.parent_task_id:
    run_result_key = task_pack.unpack_run_result_key(request.parent_task_id)
    result_summary_key = task_pack.run_result_key_to_result_summary_key(
        run_result_key)
    request_key = task_pack.result_summary_key_to_request_key(
        result_summary_key)
    parent = request_key.get()
    if not parent:
      raise ValueError('parent_task_id is not a valid task')
    request.priority = max(min(request.priority, parent.priority - 1), 0)
    # Drop the previous user.
    request.user = parent.user

  # If the priority is below 100, make sure the user has right to do so.
  if request.priority < 100 and not allow_high_priority:
    # Special case for terminate request.
    if not request.properties.is_terminate:
      # Silently drop the priority of normal users.
      request.priority = 100

  request.authenticated = auth.get_current_identity()
  if (not request.properties.is_terminate and
      request.properties.grace_period_secs is None):
    request.properties.grace_period_secs = 30
  if request.properties.idempotent is None:
    request.properties.idempotent = False

  request.service_account = 'none'
  if request.service_account_token and request.service_account_token != 'none':
    if request.service_account_token == 'bot':
      request.service_account = 'bot'
    else:
      # TODO(vadimsh): Check the token signature, verify it can be used by the
      # current user, extract service account email.
      raise auth.AuthorizationError('service_account_token is not implemented')

  request.tags.append('priority:%s' % request.priority)
  request.tags.append('user:%s' % request.user)
  request.tags.append('service_account:%s' % request.service_account)
  for key, value in request.properties.dimensions.iteritems():
    request.tags.append('%s:%s' % (key, value))
  request.tags = sorted(set(request.tags))

  if request.properties.idempotent:
    request.properties_hash = request.HASHING_ALGO(
      utils.encode_to_json(request.properties)).digest()
  else:
    request.properties_hash = None
예제 #6
0
    def run_result_key(self):
        if self.deduped_from:
            # Return the run results for the original task.
            return task_pack.unpack_run_result_key(self.deduped_from)

        if not self.try_number:
            return None
        return task_pack.result_summary_key_to_run_result_key(self.key, self.try_number)
예제 #7
0
    def run_result_key(self):
        if self.deduped_from:
            # Return the run results for the original task.
            return task_pack.unpack_run_result_key(self.deduped_from)

        if not self.try_number:
            return None
        return task_pack.result_summary_key_to_run_result_key(
            self.key, self.try_number)
예제 #8
0
    def test_unpack_run_result_key(self):
        for i in ('1', '2'):
            actual = task_pack.unpack_run_result_key('bb8021' + i)
            expected = ndb.Key('TaskRequest', 0x7fffffffff447fde,
                               'TaskResultSummary', 1, 'TaskRunResult', int(i))
            self.assertEqual(expected, actual)

        with self.assertRaises(ValueError):
            task_pack.unpack_run_result_key('1')
        with self.assertRaises(ValueError):
            task_pack.unpack_run_result_key('g')
        with self.assertRaises(ValueError):
            task_pack.unpack_run_result_key('bb80200')
        with self.assertRaises(ValueError):
            task_pack.unpack_run_result_key('bb80203')
예제 #9
0
    def test_unpack_run_result_key(self):
        # New style key.
        for i in ('1', '2'):
            actual = task_pack.unpack_run_result_key('bb8021' + i)
            expected = ndb.Key('TaskRequest', 0x7fffffffff447fde,
                               'TaskResultSummary', 1, 'TaskRunResult', int(i))
            self.assertEqual(expected, actual)
        # Old style key.
        for i in ('1', '2'):
            actual = task_pack.unpack_run_result_key('bb8020' + i)
            expected = ndb.Key('TaskRequestShard', '6f4236', 'TaskRequest',
                               196608512, 'TaskResultSummary', 1,
                               'TaskRunResult', int(i))
            self.assertEqual(expected, actual)

        with self.assertRaises(ValueError):
            task_pack.unpack_run_result_key('1')
        with self.assertRaises(ValueError):
            task_pack.unpack_run_result_key('g')
        with self.assertRaises(ValueError):
            task_pack.unpack_run_result_key('bb80200')
        with self.assertRaises(NotImplementedError):
            task_pack.unpack_run_result_key('bb80203')
예제 #10
0
def get_result_key(task_id):
  """Provides the key corresponding to a task ID."""
  key = None
  summary_key = None
  try:
    key = task_pack.unpack_result_summary_key(task_id)
    summary_key = key
  except ValueError:
    try:
      key = task_pack.unpack_run_result_key(task_id)
      summary_key = task_pack.run_result_key_to_result_summary_key(key)
    except ValueError:
      raise endpoints.BadRequestException(
          'Task ID %s produces an invalid key.' % task_id)
  return key, summary_key
예제 #11
0
 def get_result_key(self, task_id):
   # TODO(maruel): Users can only request their own task. Privileged users can
   # request any task.
   key = None
   summary_key = None
   try:
     key = task_pack.unpack_result_summary_key(task_id)
     summary_key = key
   except ValueError:
     try:
       key = task_pack.unpack_run_result_key(task_id)
       summary_key = task_pack.run_result_key_to_result_summary_key(key)
     except ValueError:
       self.abort_with_error(400, error='Invalid key')
   return key, summary_key
예제 #12
0
def get_result_key(task_id):
    """Provides the key corresponding to a task ID."""
    key = None
    summary_key = None
    try:
        key = task_pack.unpack_result_summary_key(task_id)
        summary_key = key
    except ValueError:
        try:
            key = task_pack.unpack_run_result_key(task_id)
            summary_key = task_pack.run_result_key_to_result_summary_key(key)
        except ValueError:
            raise endpoints.BadRequestException(
                'Task ID %s produces an invalid key.' % task_id)
    return key, summary_key
예제 #13
0
  def test_unpack_run_result_key(self):
    # New style key.
    for i in ('1', '2'):
      actual = task_pack.unpack_run_result_key('bb8021' + i)
      expected = ndb.Key(
          'TaskRequest', 0x7fffffffff447fde,
          'TaskResultSummary', 1, 'TaskRunResult', int(i))
      self.assertEqual(expected, actual)
    # Old style key.
    for i in ('1', '2'):
      actual = task_pack.unpack_run_result_key('bb8020' + i)
      expected = ndb.Key(
          'TaskRequestShard', '6f4236', 'TaskRequest', 196608512,
          'TaskResultSummary', 1, 'TaskRunResult', int(i))
      self.assertEqual(expected, actual)

    with self.assertRaises(ValueError):
      task_pack.unpack_run_result_key('1')
    with self.assertRaises(ValueError):
      task_pack.unpack_run_result_key('g')
    with self.assertRaises(ValueError):
      task_pack.unpack_run_result_key('bb80200')
    with self.assertRaises(NotImplementedError):
      task_pack.unpack_run_result_key('bb80203')
예제 #14
0
 def get_result_key(self, task_id):
   # TODO(maruel): Users can only request their own task. Privileged users can
   # request any task.
   key = None
   summary_key = None
   try:
     key = task_pack.unpack_result_summary_key(task_id)
     summary_key = key
   except ValueError:
     try:
       key = task_pack.unpack_run_result_key(task_id)
       summary_key = task_pack.run_result_key_to_result_summary_key(key)
     except ValueError:
       self.abort_with_error(400, error='Invalid key')
   return key, summary_key
예제 #15
0
  def test_task_parent_isolated(self):
    request = task_request.make_request(
        _gen_request(
            properties={
              'commands': None,
              'dimensions': {u'OS': u'Windows-3.1.1'},
              'inputs_ref': {
                'isolated': '1' * 40,
                'isolatedserver': 'http://localhost:1',
                'namespace': 'default-gzip',
              },
            }),
        True)
    _result_summary = task_scheduler.schedule_request(request)
    bot_dimensions = {
      u'OS': [u'Windows', u'Windows-3.1.1'],
      u'hostname': u'localhost',
      u'foo': u'bar',
    }
    actual_request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(request, actual_request)
    self.assertEqual('localhost', run_result.bot_id)
    self.assertEqual(None, task_to_run.TaskToRun.query().get().queue_number)
    # It's important to terminate the task with success.
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 0, 0.1, False, False,
            0.1, None))

    parent_id = run_result.task_id
    request = task_request.make_request(
        _gen_request(
            parent_task_id=parent_id,
            properties={'dimensions':{u'OS': u'Windows-3.1.1'}}),
        True)
    result_summary = task_scheduler.schedule_request(request)
    self.assertEqual([], result_summary.children_task_ids)
    self.assertEqual(parent_id, request.parent_task_id)

    parent_run_result_key = task_pack.unpack_run_result_key(parent_id)
    parent_res_summary_key = task_pack.run_result_key_to_result_summary_key(
        parent_run_result_key)
    expected = [result_summary.task_id]
    self.assertEqual(expected, parent_run_result_key.get().children_task_ids)
    self.assertEqual(expected, parent_res_summary_key.get().children_task_ids)
예제 #16
0
  def test_task_parent_isolated(self):
    request = task_request.make_request(
        _gen_request(
            properties={
              'commands': None,
              'dimensions': {u'OS': u'Windows-3.1.1'},
              'inputs_ref': {
                'isolated': '1' * 40,
                'isolatedserver': 'http://localhost:1',
                'namespace': 'default-gzip',
              },
            }),
        True)
    _result_summary = task_scheduler.schedule_request(request)
    bot_dimensions = {
      u'OS': [u'Windows', u'Windows-3.1.1'],
      u'hostname': u'localhost',
      u'foo': u'bar',
    }
    actual_request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(request, actual_request)
    self.assertEqual('localhost', run_result.bot_id)
    self.assertEqual(None, task_to_run.TaskToRun.query().get().queue_number)
    # It's important to terminate the task with success.
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 0, 0.1, False, False,
            0.1, None))

    parent_id = run_result.task_id
    request = task_request.make_request(
        _gen_request(
            parent_task_id=parent_id,
            properties={'dimensions':{u'OS': u'Windows-3.1.1'}}),
        True)
    result_summary = task_scheduler.schedule_request(request)
    self.assertEqual([], result_summary.children_task_ids)
    self.assertEqual(parent_id, request.parent_task_id)

    parent_run_result_key = task_pack.unpack_run_result_key(parent_id)
    parent_res_summary_key = task_pack.run_result_key_to_result_summary_key(
        parent_run_result_key)
    expected = [result_summary.task_id]
    self.assertEqual(expected, parent_run_result_key.get().children_task_ids)
    self.assertEqual(expected, parent_res_summary_key.get().children_task_ids)
예제 #17
0
  def test_task_parent_children(self):
    # Parent task creates a child task.
    parent_id = self._task_ran_successfully()
    data = _gen_request(
        parent_task_id=parent_id,
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}))
    request = task_request.make_request(data, True)
    result_summary = task_scheduler.schedule_request(request)
    self.assertEqual([], result_summary.children_task_ids)
    self.assertEqual(parent_id, request.parent_task_id)

    parent_run_result_key = task_pack.unpack_run_result_key(parent_id)
    parent_res_summary_key = task_pack.run_result_key_to_result_summary_key(
        parent_run_result_key)
    expected = [result_summary.task_id]
    self.assertEqual(expected, parent_run_result_key.get().children_task_ids)
    self.assertEqual(expected, parent_res_summary_key.get().children_task_ids)
예제 #18
0
    def post(self, task_id=None):
        request = self.parse_body()
        bot_id = request.get('id')
        task_id = request.get('task_id', '')
        message = request.get('message', 'unknown')

        machine_type = None
        bot_info = bot_management.get_info_key(bot_id).get()
        if bot_info:
            machine_type = bot_info.machine_type

        # Make sure bot self-reported ID matches the authentication token. Raises
        # auth.AuthorizationError if not.
        bot_auth.validate_bot_id_and_fetch_config(bot_id, machine_type)

        bot_management.bot_event(
            event_type='task_error',
            bot_id=bot_id,
            external_ip=self.request.remote_addr,
            authenticated_as=auth.get_peer_identity().to_bytes(),
            dimensions=None,
            state=None,
            version=None,
            quarantined=None,
            maintenance_msg=None,
            task_id=task_id,
            task_name=None,
            message=message)
        line = ('Bot: https://%s/restricted/bot/%s\n'
                'Task failed: https://%s/user/task/%s\n'
                '%s') % (app_identity.get_default_version_hostname(), bot_id,
                         app_identity.get_default_version_hostname(), task_id,
                         message)
        ereporter2.log_request(self.request, source='bot', message=line)

        msg = log_unexpected_keys(self.EXPECTED_KEYS, request, self.request,
                                  'bot', 'keys')
        if msg:
            self.abort_with_error(400, error=msg)

        msg = task_scheduler.bot_kill_task(
            task_pack.unpack_run_result_key(task_id), bot_id)
        if msg:
            logging.error(msg)
            self.abort_with_error(400, error=msg)
        self.send_response({})
예제 #19
0
  def test_task_parent_children(self):
    # Parent task creates a child task.
    parent_id = self._task_ran_successfully()
    data = _gen_request(
        parent_task_id=parent_id,
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}))
    request = task_request.make_request(data, True)
    result_summary = task_scheduler.schedule_request(request)
    self.assertEqual([], result_summary.children_task_ids)
    self.assertEqual(parent_id, request.parent_task_id)

    parent_run_result_key = task_pack.unpack_run_result_key(parent_id)
    parent_res_summary_key = task_pack.run_result_key_to_result_summary_key(
        parent_run_result_key)
    expected = [result_summary.task_id]
    self.assertEqual(expected, parent_run_result_key.get().children_task_ids)
    self.assertEqual(expected, parent_res_summary_key.get().children_task_ids)
예제 #20
0
  def post(self, task_id):
    try:
      key = task_pack.unpack_result_summary_key(task_id)
      request_key = task_pack.result_summary_key_to_request_key(key)
    except ValueError:
      try:
        key = task_pack.unpack_run_result_key(task_id)
        request_key = task_pack.result_summary_key_to_request_key(
            task_pack.run_result_key_to_result_summary_key(key))
      except (NotImplementedError, ValueError):
        self.abort(404, 'Invalid key format.')

    # Retrying a task is essentially reusing the same task request as the
    # original one, but with new parameters.
    original_request = request_key.get()
    if not original_request:
      self.abort(404, 'Invalid request key.')
    new_request = task_request.make_request_clone(original_request)
    result_summary = task_scheduler.schedule_request(new_request)
    self.redirect('/user/task/%s' % result_summary.task_id)
예제 #21
0
  def post(self, task_id):
    try:
      key = task_pack.unpack_result_summary_key(task_id)
      request_key = task_pack.result_summary_key_to_request_key(key)
    except ValueError:
      try:
        key = task_pack.unpack_run_result_key(task_id)
        request_key = task_pack.result_summary_key_to_request_key(
            task_pack.run_result_key_to_result_summary_key(key))
      except (NotImplementedError, ValueError):
        self.abort(404, 'Invalid key format.')

    # Retrying a task is essentially reusing the same task request as the
    # original one, but with new parameters.
    original_request = request_key.get()
    if not original_request:
      self.abort(404, 'Invalid request key.')
    new_request = task_request.make_request_clone(original_request)
    result_summary = task_scheduler.schedule_request(new_request)
    self.redirect('/user/task/%s' % result_summary.task_id)
예제 #22
0
def make_request(request, is_bot_or_admin):
    """Registers the request in the DB.

  Fills up some values.

  If parent_task_id is set, properties for the parent are used:
  - priority: defaults to parent.priority - 1
  - user: overriden by parent.user

  """
    assert request.__class__ is TaskRequest
    if request.parent_task_id:
        run_result_key = task_pack.unpack_run_result_key(
            request.parent_task_id)
        result_summary_key = task_pack.run_result_key_to_result_summary_key(
            run_result_key)
        request_key = task_pack.result_summary_key_to_request_key(
            result_summary_key)
        parent = request_key.get()
        if not parent:
            raise ValueError('parent_task_id is not a valid task')
        request.priority = max(min(request.priority, parent.priority - 1), 0)
        # Drop the previous user.
        request.user = parent.user

    # If the priority is below 100, make sure the user has right to do so.
    if request.priority < 100 and not is_bot_or_admin:
        # Silently drop the priority of normal users.
        request.priority = 100

    request.authenticated = auth.get_current_identity()
    if (not request.properties.is_terminate
            and request.properties.grace_period_secs is None):
        request.properties.grace_period_secs = 30
    if request.properties.idempotent is None:
        request.properties.idempotent = False
    _put_request(request)
    return request
예제 #23
0
 def task(self):
     if not self.task_id:
         return None
     return task_pack.unpack_run_result_key(self.task_id)
예제 #24
0
  def get(self, task_id):
    request, result = self.get_request_and_result(task_id)
    parent_task_future = None
    if request.parent_task_id:
      parent_key = task_pack.unpack_run_result_key(request.parent_task_id)
      parent_task_future = parent_key.get_async()
    children_tasks_futures = [
      task_pack.unpack_result_summary_key(c).get_async()
      for c in result.children_task_ids
    ]

    bot_id = result.bot_id
    following_task_future = None
    previous_task_future = None
    if result.started_ts:
      # Use a shortcut name because it becomes unwieldy otherwise.
      cls = task_result.TaskRunResult

      # Note that the links will be to the TaskRunResult, not to
      # TaskResultSummary.
      following_task_future = cls.query(
          cls.bot_id == bot_id,
          cls.started_ts > result.started_ts,
          ).order(cls.started_ts).get_async()
      previous_task_future = cls.query(
          cls.bot_id == bot_id,
          cls.started_ts < result.started_ts,
          ).order(-cls.started_ts).get_async()

    bot_future = (
        bot_management.get_info_key(bot_id).get_async() if bot_id else None)

    following_task = None
    if following_task_future:
      following_task = following_task_future.get_result()

    previous_task = None
    if previous_task_future:
      previous_task = previous_task_future.get_result()

    parent_task = None
    if parent_task_future:
      parent_task = parent_task_future.get_result()
    children_tasks = [c.get_result() for c in children_tasks_futures]

    cipd = None
    if request.properties.cipd_input:
      cipd = {
        'server': request.properties.cipd_input.server,
        'client_package': request.properties.cipd_input.client_package,
        'packages': self.packages_grouped_by_path(
            request.properties.cipd_input.packages),
      }

    cipd_pins = None
    if result.cipd_pins:
      cipd_pins = {
        'client_package': result.cipd_pins.client_package,
        'packages': self.packages_grouped_by_path(result.cipd_pins.packages),
      }

    params = {
      'bot': bot_future.get_result() if bot_future else None,
      'children_tasks': children_tasks,
      'cipd': cipd,
      'cipd_pins': cipd_pins,
      'is_admin': acl.is_admin(),
      'is_gae_admin': users.is_current_user_admin(),
      'is_privileged_user': acl.is_privileged_user(),
      'following_task': following_task,
      'full_appid': os.environ['APPLICATION_ID'],
      'host_url': self.request.host_url,
      'is_running': result.state == task_result.State.RUNNING,
      'parent_task': parent_task,
      'previous_task': previous_task,
      'request': request,
      'task': result,
      'try_link': '/task?id=%s' % task_id,
      'xsrf_token': self.generate_xsrf_token(),
    }
    self.response.write(template.render('swarming/user_task.html', params))
예제 #25
0
def schedule_request(request, secret_bytes):
  """Creates and stores all the entities to schedule a new task request.

  Assumes ACL check has already happened (see 'check_schedule_request_acl').

  The number of entities created is ~4: TaskRequest, TaskToRun and
  TaskResultSummary and (optionally) SecretBytes. They are in single entity
  group and saved in a single transaction.

  Arguments:
  - request: TaskRequest entity to be saved in the DB. It's key must not be set
             and the entity must not be saved in the DB yet.
  - secret_bytes: SecretBytes entity to be saved in the DB. It's key will be set
             and the entity will be stored by this function. None is allowed if
             there are no SecretBytes for this task.

  Returns:
    TaskResultSummary. TaskToRun is not returned.
  """
  assert isinstance(request, task_request.TaskRequest), request
  assert not request.key, request.key

  # This does a DB GET, occasionally triggers a task queue. May throw, which is
  # surfaced to the user but it is safe as the task request wasn't stored yet.
  task_queues.assert_task(request)

  now = utils.utcnow()
  request.key = task_request.new_request_key()
  result_summary = task_result.new_result_summary(request)
  result_summary.modified_ts = now
  to_run = None
  if secret_bytes:
    secret_bytes.key = request.secret_bytes_key

  dupe_summary = None
  for i in xrange(request.num_task_slices):
    t = request.task_slice(i)
    if t.properties.idempotent:
      dupe_summary = _find_dupe_task(now, t.properties_hash())
      if dupe_summary:
        _dedupe_result_summary(dupe_summary, result_summary, i)
        # In this code path, there's not much to do as the task will not be run,
        # previous results are returned. We still need to store the TaskRequest
        # and TaskResultSummary.
        # Since the task is never scheduled, TaskToRun is not stored.
        # Since the has_secret_bytes property is already set for UI purposes,
        # and the task itself will never be run, we skip storing the
        # SecretBytes, as they would never be read and will just consume space
        # in the datastore (and the task we deduplicated with will have them
        # stored anyway, if we really want to get them again).
        secret_bytes = None
        break

  if not dupe_summary:
    # The task has to run. Make sure there's capacity.
    index = 0
    while index < request.num_task_slices:
      # This needs to be extremely fast.
      to_run = task_to_run.new_task_to_run(request, 1, index)
      if _has_capacity(request.task_slice(index).properties.dimensions):
        # It's pending at this index now.
        result_summary.current_task_slice = index
        break
      index += 1

    if index == request.num_task_slices:
      # Skip to_run since it's not enqueued.
      to_run = None
      # Same rationale as deduped task.
      secret_bytes = None
      # Instantaneously denied.
      result_summary.abandoned_ts = result_summary.created_ts
      result_summary.state = task_result.State.NO_RESOURCE

  # Storing these entities makes this task live. It is important at this point
  # that the HTTP handler returns as fast as possible, otherwise the task will
  # be run but the client will not know about it.
  _gen_key = lambda: _gen_new_keys(result_summary, to_run, secret_bytes)
  extra = filter(bool, [result_summary, to_run, secret_bytes])
  datastore_utils.insert(request, new_key_callback=_gen_key, extra=extra)
  if dupe_summary:
    logging.debug(
        'New request %s reusing %s', result_summary.task_id,
        dupe_summary.task_id)
  elif result_summary.state == task_result.State.NO_RESOURCE:
    logging.warning(
        'New request %s denied with NO_RESOURCE', result_summary.task_id)
    logging.debug('New request %s', result_summary.task_id)
  else:
    logging.debug('New request %s', result_summary.task_id)

  # Get parent task details if applicable.
  if request.parent_task_id:
    parent_run_key = task_pack.unpack_run_result_key(request.parent_task_id)
    parent_task_keys = [
      parent_run_key,
      task_pack.run_result_key_to_result_summary_key(parent_run_key),
    ]

    def run_parent():
      # This one is slower.
      items = ndb.get_multi(parent_task_keys)
      k = result_summary.task_id
      for item in items:
        item.children_task_ids.append(k)
        item.modified_ts = now
      ndb.put_multi(items)

    # Raising will abort to the caller. There's a risk that for tasks with
    # parent tasks, the task will be lost due to this transaction.
    # TODO(maruel): An option is to update the parent task as part of a cron
    # job, which would remove this code from the critical path.
    datastore_utils.transaction(run_parent)

  ts_mon_metrics.on_task_requested(result_summary, bool(dupe_summary))
  return result_summary
예제 #26
0
def schedule_request(request, check_acls=True):
    """Creates and stores all the entities to schedule a new task request.

  Checks ACLs first. Raises auth.AuthorizationError if caller is not authorized
  to post this request.

  The number of entities created is 3: TaskRequest, TaskToRun and
  TaskResultSummary.

  All 3 entities in the same entity group (TaskReqest, TaskToRun,
  TaskResultSummary) are saved as a DB transaction.

  Arguments:
  - request: TaskRequest entity to be saved in the DB. It's key must not be set
             and the entity must not be saved in the DB yet.
  - check_acls: Whether the request should check ACLs.

  Returns:
    TaskResultSummary. TaskToRun is not returned.
  """
    assert isinstance(request, task_request.TaskRequest), request
    assert not request.key, request.key

    # Raises AuthorizationError with helpful message if the request.authorized
    # can't use some of the requested dimensions.
    if check_acls:
        _check_dimension_acls(request)

    now = utils.utcnow()
    request.key = task_request.new_request_key()
    task = task_to_run.new_task_to_run(request)
    result_summary = task_result.new_result_summary(request)
    result_summary.modified_ts = now

    def get_new_keys():
        # Warning: this assumes knowledge about the hierarchy of each entity.
        key = task_request.new_request_key()
        task.key.parent = key
        old = result_summary.task_id
        result_summary.parent = key
        logging.info('%s conflicted, using %s', old, result_summary.task_id)
        return key

    deduped = False
    if request.properties.idempotent:
        dupe_summary = _find_dupe_task(now, request.properties_hash)
        if dupe_summary:
            # Setting task.queue_number to None removes it from the scheduling.
            task.queue_number = None
            _copy_summary(
                dupe_summary, result_summary,
                ('created_ts', 'modified_ts', 'name', 'user', 'tags'))
            # Zap irrelevant properties. PerformanceStats is also not copied over,
            # since it's not relevant.
            result_summary.properties_hash = None
            result_summary.try_number = 0
            result_summary.cost_saved_usd = result_summary.cost_usd
            # Only zap after.
            result_summary.costs_usd = []
            result_summary.deduped_from = task_pack.pack_run_result_key(
                dupe_summary.run_result_key)
            # In this code path, there's not much to do as the task will not be run,
            # previous results are returned. We still need to store all the entities
            # correctly.
            datastore_utils.insert(request,
                                   get_new_keys,
                                   extra=[task, result_summary])
            logging.debug('New request %s reusing %s', result_summary.task_id,
                          dupe_summary.task_id)
            deduped = True

    if not deduped:
        # Storing these entities makes this task live. It is important at this point
        # that the HTTP handler returns as fast as possible, otherwise the task will
        # be run but the client will not know about it.
        datastore_utils.insert(request,
                               get_new_keys,
                               extra=[task, result_summary])
        logging.debug('New request %s', result_summary.task_id)

    # Get parent task details if applicable.
    if request.parent_task_id:
        parent_run_key = task_pack.unpack_run_result_key(
            request.parent_task_id)
        parent_task_keys = [
            parent_run_key,
            task_pack.run_result_key_to_result_summary_key(parent_run_key),
        ]

        def run_parent():
            # This one is slower.
            items = ndb.get_multi(parent_task_keys)
            k = result_summary.task_id
            for item in items:
                item.children_task_ids.append(k)
                item.modified_ts = now
            ndb.put_multi(items)

        # Raising will abort to the caller. There's a risk that for tasks with
        # parent tasks, the task will be lost due to this transaction.
        # TODO(maruel): An option is to update the parent task as part of a cron
        # job, which would remove this code from the critical path.
        datastore_utils.transaction(run_parent)

    stats.add_task_entry('task_enqueued',
                         result_summary.key,
                         dimensions=request.properties.dimensions,
                         user=request.user)
    ts_mon_metrics.update_jobs_requested_metrics(result_summary, deduped)
    return result_summary
예제 #27
0
def _validate_task_run_id(_prop, value):
  """Validates a task_id looks valid without fetching the entity."""
  if not value:
    return None
  task_pack.unpack_run_result_key(value)
  return value
예제 #28
0
def make_request(data):
  """Constructs a TaskRequest out of a yet-to-be-specified API.

  Argument:
  - data: dict with:
    - name
    - parent_task_id*
    - properties
      - commands
      - data
      - dimensions
      - env
      - execution_timeout_secs
      - grace_period_secs*
      - idempotent*
      - io_timeout_secs
    - priority
    - scheduling_expiration_secs
    - tags
    - user

  * are optional.

  If parent_task_id is set, properties for the parent are used:
  - priority: defaults to parent.priority - 1
  - user: overriden by parent.user

  Returns:
    The newly created TaskRequest.
  """
  # Save ourself headaches with typos and refuses unexpected values.
  _assert_keys(_EXPECTED_DATA_KEYS, _REQUIRED_DATA_KEYS, data, 'request keys')
  data_properties = data['properties']
  _assert_keys(
      _EXPECTED_PROPERTIES_KEYS, _REQUIRED_PROPERTIES_KEYS, data_properties,
      'request properties keys')

  parent_task_id = data.get('parent_task_id') or None
  if parent_task_id:
    data = data.copy()
    run_result_key = task_pack.unpack_run_result_key(parent_task_id)
    result_summary_key = task_pack.run_result_key_to_result_summary_key(
        run_result_key)
    request_key = task_pack.result_summary_key_to_request_key(
        result_summary_key)
    parent = request_key.get()
    if not parent:
      raise ValueError('parent_task_id is not a valid task')
    data['priority'] = max(min(data['priority'], parent.priority - 1), 0)
    # Drop the previous user.
    data['user'] = parent.user

  # Can't be a validator yet as we wouldn't be able to load previous task
  # requests.
  if len(data_properties.get('commands') or []) > 1:
    raise datastore_errors.BadValueError('Only one command is supported')

  # Class TaskProperties takes care of making everything deterministic.
  properties = TaskProperties(
      commands=data_properties['commands'],
      data=data_properties['data'],
      dimensions=data_properties['dimensions'],
      env=data_properties['env'],
      execution_timeout_secs=data_properties['execution_timeout_secs'],
      grace_period_secs=data_properties.get('grace_period_secs', 30),
      idempotent=data_properties.get('idempotent', False),
      io_timeout_secs=data_properties['io_timeout_secs'])

  now = utils.utcnow()
  expiration_ts = now + datetime.timedelta(
      seconds=data['scheduling_expiration_secs'])

  request = TaskRequest(
      authenticated=auth.get_current_identity(),
      created_ts=now,
      expiration_ts=expiration_ts,
      name=data['name'],
      parent_task_id=parent_task_id,
      priority=data['priority'],
      properties=properties,
      tags=data['tags'],
      user=data['user'] or '')
  _put_request(request)
  return request
예제 #29
0
    def post(self, task_id=None):
        # Unlike handshake and poll, we do not accept invalid keys here. This code
        # path is much more strict.
        request = self.parse_body()
        msg = log_unexpected_subset_keys(self.ACCEPTED_KEYS,
                                         self.REQUIRED_KEYS, request,
                                         self.request, 'bot', 'keys')
        if msg:
            self.abort_with_error(400, error=msg)

        bot_id = request['id']
        cost_usd = request['cost_usd']
        task_id = request['task_id']

        duration = request.get('duration')
        exit_code = request.get('exit_code')
        hard_timeout = request.get('hard_timeout')
        io_timeout = request.get('io_timeout')
        output = request.get('output')
        output_chunk_start = request.get('output_chunk_start')
        outputs_ref = request.get('outputs_ref')

        run_result_key = task_pack.unpack_run_result_key(task_id)
        if output is not None:
            try:
                output = base64.b64decode(output)
            except UnicodeEncodeError as e:
                logging.error('Failed to decode output\n%s\n%r', e, output)
                output = output.encode('ascii', 'replace')
            except TypeError as e:
                # Save the output as-is instead. The error will be logged in ereporter2
                # and returning a HTTP 500 would only force the bot to stay in a retry
                # loop.
                logging.error('Failed to decode output\n%s\n%r', e, output)

        try:
            success, completed = task_scheduler.bot_update_task(
                run_result_key, bot_id, output, output_chunk_start, exit_code,
                duration, hard_timeout, io_timeout, cost_usd, outputs_ref)
            if not success:
                logging.info('Failed to update, please retry')
                self.abort_with_error(500,
                                      error='Failed to update, please retry')

            action = 'task_completed' if completed else 'task_update'
            bot_management.bot_event(event_type=action,
                                     bot_id=bot_id,
                                     external_ip=self.request.remote_addr,
                                     dimensions=None,
                                     state=None,
                                     version=None,
                                     quarantined=None,
                                     task_id=task_id,
                                     task_name=None)
        except ValueError as e:
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' % e)
            self.abort_with_error(400, error=str(e))
        except webob.exc.HTTPException:
            raise
        except Exception as e:
            logging.exception('Internal error: %s', e)
            self.abort_with_error(500, error=str(e))

        # TODO(maruel): When a task is canceled, reply with 'DIE' so that the bot
        # reboots itself to abort the task abruptly. It is useful when a task hangs
        # and the timeout was set too long or the task was superseded by a newer
        # task with more recent executable (e.g. a new Try Server job on a newer
        # patchset on Rietveld).
        self.send_response({'ok': True})
예제 #30
0
def get_task_account_token(task_id, bot_id, scopes):
    """Returns an access token for a service account associated with a task.

  Assumes authorization checks have been made already. If the task is not
  configured to use service account returns ('none', None). If the task is
  configured to use whatever bot is using when calling Swarming, returns
  ('bot', None).

  Otherwise returns (<email>, AccessToken with valid token for <email>).

  Args:
    task_id: ID of the task.
    bot_id: ID of the bot that executes the task, for logs.
    scopes: list of requested OAuth scopes.

  Returns:
    (<service account email> or 'bot' or 'none', AccessToken or None).

  Raises:
    PermissionError if the token server forbids the usage.
    MisconfigurationError if the service account is misconfigured.
    InternalError if the RPC fails unexpectedly.
  """
    # Grab corresponding TaskRequest.
    try:
        result_summary_key = task_pack.run_result_key_to_result_summary_key(
            task_pack.unpack_run_result_key(task_id))
        task_request_key = task_pack.result_summary_key_to_request_key(
            result_summary_key)
    except ValueError as exc:
        logging.error('Unexpectedly bad task_id: %s', exc)
        raise MisconfigurationError('Bad task_id: %s' % task_id)

    task_request = task_request_key.get()
    if not task_request:
        raise MisconfigurationError('No such task request: %s' % task_id)

    # 'none' or 'bot' cases are handled by the bot locally, no token for them.
    if task_request.service_account in ('none', 'bot'):
        return task_request.service_account, None

    # The only possible case is a service account email. Double check this.
    if not is_service_account(task_request.service_account):
        raise MisconfigurationError('Not a service account email: %s' %
                                    task_request.service_account)

    # Should have a token prepared by 'get_oauth_token_grant' already.
    if not task_request.service_account_token:
        raise MisconfigurationError(
            'The task request %s has no associated service account token' %
            task_id)

    # Additional information for Token Server's logs.
    audit_tags = [
        'swarming:bot_id:%s' % bot_id,
        'swarming:task_id:%s' % task_id,
        'swarming:task_name:%s' % task_request.name,
    ]

    # Use this token to grab the real OAuth token. Note that the bot caches the
    # resulting OAuth token internally, so we don't bother to cache it here.
    access_token, expiry = _mint_oauth_token_via_grant(
        task_request.service_account_token, scopes, audit_tags)

    # Log and return the token.
    token = AccessToken(access_token,
                        int(utils.datetime_to_timestamp(expiry) / 1e6))
    _check_and_log_token('task associated', task_request.service_account,
                         token)
    return task_request.service_account, token
예제 #31
0
    def post(self, task_id=None):
        # Unlike handshake and poll, we do not accept invalid keys here. This code
        # path is much more strict.
        request = self.parse_body()
        msg = log_unexpected_subset_keys(self.ACCEPTED_KEYS,
                                         self.REQUIRED_KEYS, request,
                                         self.request, 'bot', 'keys')
        if msg:
            self.abort_with_error(400, error=msg)

        bot_id = request['id']
        cost_usd = request['cost_usd']
        task_id = request['task_id']

        bot_overhead = request.get('bot_overhead')
        duration = request.get('duration')
        exit_code = request.get('exit_code')
        hard_timeout = request.get('hard_timeout')
        io_timeout = request.get('io_timeout')
        isolated_stats = request.get('isolated_stats')
        output = request.get('output')
        output_chunk_start = request.get('output_chunk_start')
        outputs_ref = request.get('outputs_ref')

        if bool(isolated_stats) != (bot_overhead is not None):
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' %
                                   task_id)
            self.abort_with_error(
                400,
                error='Both bot_overhead and isolated_stats must be set '
                'simultaneously\nbot_overhead: %s\nisolated_stats: %s' %
                (bot_overhead, isolated_stats))

        run_result_key = task_pack.unpack_run_result_key(task_id)
        performance_stats = None
        if isolated_stats:
            download = isolated_stats['download']
            upload = isolated_stats['upload']
            performance_stats = task_result.PerformanceStats(
                bot_overhead=bot_overhead,
                isolated_download=task_result.IsolatedOperation(
                    duration=download['duration'],
                    initial_number_items=download['initial_number_items'],
                    initial_size=download['initial_size'],
                    items_cold=base64.b64decode(download['items_cold']),
                    items_hot=base64.b64decode(download['items_hot'])),
                isolated_upload=task_result.IsolatedOperation(
                    duration=upload['duration'],
                    items_cold=base64.b64decode(upload['items_cold']),
                    items_hot=base64.b64decode(upload['items_hot'])))

        if output is not None:
            try:
                output = base64.b64decode(output)
            except UnicodeEncodeError as e:
                logging.error('Failed to decode output\n%s\n%r', e, output)
                output = output.encode('ascii', 'replace')
            except TypeError as e:
                # Save the output as-is instead. The error will be logged in ereporter2
                # and returning a HTTP 500 would only force the bot to stay in a retry
                # loop.
                logging.error('Failed to decode output\n%s\n%r', e, output)
        if outputs_ref:
            outputs_ref = task_request.FilesRef(**outputs_ref)

        try:
            state = task_scheduler.bot_update_task(
                run_result_key=run_result_key,
                bot_id=bot_id,
                output=output,
                output_chunk_start=output_chunk_start,
                exit_code=exit_code,
                duration=duration,
                hard_timeout=hard_timeout,
                io_timeout=io_timeout,
                cost_usd=cost_usd,
                outputs_ref=outputs_ref,
                performance_stats=performance_stats)
            if not state:
                logging.info('Failed to update, please retry')
                self.abort_with_error(500,
                                      error='Failed to update, please retry')

            if state in (task_result.State.COMPLETED,
                         task_result.State.TIMED_OUT):
                action = 'task_completed'
            else:
                assert state in (task_result.State.BOT_DIED,
                                 task_result.State.RUNNING), state
                action = 'task_update'
            bot_management.bot_event(event_type=action,
                                     bot_id=bot_id,
                                     external_ip=self.request.remote_addr,
                                     dimensions=None,
                                     state=None,
                                     version=None,
                                     quarantined=None,
                                     task_id=task_id,
                                     task_name=None)
        except ValueError as e:
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' % e)
            self.abort_with_error(400, error=str(e))
        except webob.exc.HTTPException:
            raise
        except Exception as e:
            logging.exception('Internal error: %s', e)
            self.abort_with_error(500, error=str(e))

        # TODO(maruel): When a task is canceled, reply with 'DIE' so that the bot
        # reboots itself to abort the task abruptly. It is useful when a task hangs
        # and the timeout was set too long or the task was superseded by a newer
        # task with more recent executable (e.g. a new Try Server job on a newer
        # patchset on Rietveld).
        self.send_response({'ok': True})
예제 #32
0
def get_task_account_token(task_id, bot_id, scopes):
    """Returns an access token for a service account associated with a task.

  Assumes authorization checks have been made already. If the task is not
  configured to use service account returns ('none', None). If the task is
  configured to use whatever bot is using when calling Swarming, returns
  ('bot', None).

  Otherwise returns (<email>, AccessToken with valid token for <email>).

  If the task has realm, it calls MintServiceAccountToken rpc using the realm.
  Otherwise, it calls MintOAuthTokenViaGrant with grant token. The legacy path
  will be deprecated after migrating to Realm-based configurations.

  Args:
    task_id: ID of the task.
    bot_id: ID of the bot that executes the task, for logs.
    scopes: list of requested OAuth scopes.

  Returns:
    (<service account email> or 'bot' or 'none', AccessToken or None).

  Raises:
    PermissionError if the token server forbids the usage.
    MisconfigurationError if the service account is misconfigured.
    InternalError if the RPC fails unexpectedly.
  """
    # Grab corresponding TaskRequest.
    try:
        result_summary_key = task_pack.run_result_key_to_result_summary_key(
            task_pack.unpack_run_result_key(task_id))
        task_request_key = task_pack.result_summary_key_to_request_key(
            result_summary_key)
    except ValueError as exc:
        logging.error('Unexpectedly bad task_id: %s', exc)
        raise MisconfigurationError('Bad task_id: %s' % task_id)

    task_request = task_request_key.get()
    if not task_request:
        raise MisconfigurationError('No such task request: %s' % task_id)

    # 'none' or 'bot' cases are handled by the bot locally, no token for them.
    if task_request.service_account in ('none', 'bot'):
        return task_request.service_account, None

    # The only possible case is a service account email. Double check this.
    if not service_accounts_utils.is_service_account(
            task_request.service_account):
        raise MisconfigurationError('Not a service account email: %s' %
                                    task_request.service_account)

    # Additional information for Token Server's logs.
    audit_tags = [
        'swarming:bot_id:%s' % bot_id,
        'swarming:task_id:%s' % task_id,
        'swarming:task_name:%s' % task_request.name,
    ]

    # task_request.service_account_token can be empty here only when the task has
    # a realm and the service account was authorized via realm ACLs. Use
    # MintServiceAccountToken RPC for such tasks.
    if not task_request.service_account_token:
        assert task_request.realm
        # Re-check if the service account is still allowed to run in the realm,
        # because it may have changed since the last check.
        pool_cfg = pools_config.get_pool_config(task_request.pool)
        realms.check_tasks_act_as(task_request, pool_cfg, enforce=True)
        access_token, expiry = _mint_service_account_token(
            task_request.service_account, task_request.realm, scopes,
            audit_tags)
    else:
        # Use grant token to grab the real OAuth token. Note that the bot caches the
        # resulting OAuth token internally, so we don't bother to cache it here.
        access_token, expiry = _mint_oauth_token_via_grant(
            task_request.service_account_token, scopes, audit_tags)

    # Log and return the token.
    token = AccessToken(access_token,
                        int(utils.datetime_to_timestamp(expiry) / 1e6))
    _check_and_log_token('task associated', task_request.service_account,
                         token)
    return task_request.service_account, token
예제 #33
0
  def get(self, task_id):
    try:
      key = task_pack.unpack_result_summary_key(task_id)
      request_key = task_pack.result_summary_key_to_request_key(key)
    except ValueError:
      try:
        key = task_pack.unpack_run_result_key(task_id)
        request_key = task_pack.result_summary_key_to_request_key(
            task_pack.run_result_key_to_result_summary_key(key))
      except (NotImplementedError, ValueError):
        self.abort(404, 'Invalid key format.')

    # 'result' can be either a TaskRunResult or TaskResultSummary.
    result_future = key.get_async()
    request_future = request_key.get_async()
    result = result_future.get_result()
    if not result:
      self.abort(404, 'Invalid key.')

    if not acl.is_privileged_user():
      self.abort(403, 'Implement access control based on the user')

    request = request_future.get_result()
    parent_task_future = None
    if request.parent_task_id:
      parent_key = task_pack.unpack_run_result_key(request.parent_task_id)
      parent_task_future = parent_key.get_async()
    children_tasks_futures = [
      task_pack.unpack_result_summary_key(c).get_async()
      for c in result.children_task_ids
    ]

    bot_id = result.bot_id
    following_task_future = None
    previous_task_future = None
    if result.started_ts:
      # Use a shortcut name because it becomes unwieldy otherwise.
      cls = task_result.TaskRunResult

      # Note that the links will be to the TaskRunResult, not to
      # TaskResultSummary.
      following_task_future = cls.query(
          cls.bot_id == bot_id,
          cls.started_ts > result.started_ts,
          ).order(cls.started_ts).get_async()
      previous_task_future = cls.query(
          cls.bot_id == bot_id,
          cls.started_ts < result.started_ts,
          ).order(-cls.started_ts).get_async()

    bot_future = (
        bot_management.get_info_key(bot_id).get_async() if bot_id else None)

    following_task = None
    if following_task_future:
      following_task = following_task_future.get_result()

    previous_task = None
    if previous_task_future:
      previous_task = previous_task_future.get_result()

    parent_task = None
    if parent_task_future:
      parent_task = parent_task_future.get_result()
    children_tasks = [c.get_result() for c in children_tasks_futures]

    params = {
      'bot': bot_future.get_result() if bot_future else None,
      'children_tasks': children_tasks,
      'is_admin': acl.is_admin(),
      'is_gae_admin': users.is_current_user_admin(),
      'is_privileged_user': acl.is_privileged_user(),
      'following_task': following_task,
      'full_appid': os.environ['APPLICATION_ID'],
      'host_url': self.request.host_url,
      'is_running': result.state == task_result.State.RUNNING,
      'now': utils.utcnow(),
      'parent_task': parent_task,
      'previous_task': previous_task,
      'request': request,
      'task': result,
      'xsrf_token': self.generate_xsrf_token(),
    }
    self.response.write(template.render('swarming/user_task.html', params))
예제 #34
0
def schedule_request(request):
  """Creates and stores all the entities to schedule a new task request.

  The number of entities created is 3: TaskRequest, TaskResultSummary and
  TaskToRun.

  The TaskRequest is saved first as a DB transaction, then TaskResultSummary and
  TaskToRun are saved as a single DB RPC. The Search index is also updated
  in-between.

  Arguments:
  - request: is in the TaskRequest entity saved in the DB.

  Returns:
    TaskResultSummary. TaskToRun is not returned.
  """
  dupe_future = None
  if request.properties.idempotent:
    # Find a previously run task that is also idempotent and completed. Start a
    # query to fetch items that can be used to dedupe the task. See the comment
    # for this property for more details.
    #
    # Do not use "cls.created_ts > oldest" here because this would require a
    # composite index. It's unnecessary because TaskRequest.key is mostly
    # equivalent to decreasing TaskRequest.created_ts, ordering by key works as
    # well and doesn't require a composite index.
    cls = task_result.TaskResultSummary
    h = request.properties.properties_hash
    dupe_future = cls.query(cls.properties_hash==h).order(cls.key).get_async()

  # At this point, the request is now in the DB but not yet in a mode where it
  # can be triggered or visible. Index it right away so it is searchable. If any
  # of remaining calls in this function fail, the TaskRequest and Search
  # Document will simply point to an incomplete task, which will be ignored.
  #
  # Creates the entities TaskToRun and TaskResultSummary but do not save them
  # yet. TaskRunResult will be created once a bot starts it.
  task = task_to_run.new_task_to_run(request)
  result_summary = task_result.new_result_summary(request)

  # Do not specify a doc_id, as they are guaranteed to be monotonically
  # increasing and searches are done in reverse order, which fits exactly the
  # created_ts ordering. This is useful because DateField is precise to the date
  # (!) and NumberField is signed 32 bits so the best it could do with EPOCH is
  # second resolution up to year 2038.
  index = search.Index(name='requests')
  packed = task_pack.pack_result_summary_key(result_summary.key)
  doc = search.Document(
      fields=[
        search.TextField(name='name', value=request.name),
        search.AtomField(name='id', value=packed),
      ])
  # Even if it fails here, we're still fine, as the task is not "alive" yet.
  search_future = index.put_async([doc])

  now = utils.utcnow()

  if dupe_future:
    # Reuse the results!
    dupe_summary = dupe_future.get_result()
    # Refuse tasks older than X days. This is due to the isolate server dropping
    # files. https://code.google.com/p/swarming/issues/detail?id=197
    oldest = now - datetime.timedelta(
        seconds=config.settings().reusable_task_age_secs)
    if dupe_summary and dupe_summary.created_ts > oldest:
      # If there's a bug, commenting out this block is sufficient to disable the
      # functionality.
      # Setting task.queue_number to None removes it from the scheduling.
      task.queue_number = None
      _copy_entity(dupe_summary, result_summary, ('created_ts', 'name', 'user'))
      result_summary.properties_hash = None
      result_summary.try_number = 0
      result_summary.cost_saved_usd = result_summary.cost_usd
      # Only zap after.
      result_summary.costs_usd = []
      result_summary.deduped_from = task_pack.pack_run_result_key(
          dupe_summary.run_result_key)

  # Get parent task details if applicable.
  parent_task_keys = None
  if request.parent_task_id:
    parent_run_key = task_pack.unpack_run_result_key(request.parent_task_id)
    parent_task_keys = [
      parent_run_key,
      task_pack.run_result_key_to_result_summary_key(parent_run_key),
    ]

  result_summary.modified_ts = now

  # Storing these entities makes this task live. It is important at this point
  # that the HTTP handler returns as fast as possible, otherwise the task will
  # be run but the client will not know about it.
  def run():
    ndb.put_multi([result_summary, task])

  def run_parent():
    # This one is slower.
    items = ndb.get_multi(parent_task_keys)
    k = result_summary.task_id
    for item in items:
      item.children_task_ids.append(k)
      item.modified_ts = now
    ndb.put_multi(items)

  # Raising will abort to the caller.
  futures = [datastore_utils.transaction_async(run)]
  if parent_task_keys:
    futures.append(datastore_utils.transaction_async(run_parent))

  try:
    search_future.get_result()
  except search.Error:
    # Do not abort the task, for now search is best effort.
    logging.exception('Put failed')

  for future in futures:
    # Check for failures, it would raise in this case, aborting the call.
    future.get_result()

  stats.add_task_entry(
      'task_enqueued', result_summary.key,
      dimensions=request.properties.dimensions,
      user=request.user)
  return result_summary
예제 #35
0
    def get(self, task_id):
        try:
            key = task_pack.unpack_result_summary_key(task_id)
            request_key = task_pack.result_summary_key_to_request_key(key)
        except ValueError:
            try:
                key = task_pack.unpack_run_result_key(task_id)
                request_key = task_pack.result_summary_key_to_request_key(
                    task_pack.run_result_key_to_result_summary_key(key)
                )
            except (NotImplementedError, ValueError):
                self.abort(404, "Invalid key format.")

        # 'result' can be either a TaskRunResult or TaskResultSummary.
        result_future = key.get_async()
        request_future = request_key.get_async()
        result = result_future.get_result()
        if not result:
            self.abort(404, "Invalid key.")

        if not acl.is_privileged_user():
            self.abort(403, "Implement access control based on the user")

        request = request_future.get_result()
        parent_task_future = None
        if request.parent_task_id:
            parent_key = task_pack.unpack_run_result_key(request.parent_task_id)
            parent_task_future = parent_key.get_async()
        children_tasks_futures = [task_pack.unpack_result_summary_key(c).get_async() for c in result.children_task_ids]

        bot_id = result.bot_id
        following_task_future = None
        previous_task_future = None
        if result.started_ts:
            # Use a shortcut name because it becomes unwieldy otherwise.
            cls = task_result.TaskRunResult

            # Note that the links will be to the TaskRunResult, not to
            # TaskResultSummary.
            following_task_future = (
                cls.query(cls.bot_id == bot_id, cls.started_ts > result.started_ts).order(cls.started_ts).get_async()
            )
            previous_task_future = (
                cls.query(cls.bot_id == bot_id, cls.started_ts < result.started_ts).order(-cls.started_ts).get_async()
            )

        bot_future = bot_management.get_info_key(bot_id).get_async() if bot_id else None

        following_task = None
        if following_task_future:
            following_task = following_task_future.get_result()

        previous_task = None
        if previous_task_future:
            previous_task = previous_task_future.get_result()

        parent_task = None
        if parent_task_future:
            parent_task = parent_task_future.get_result()
        children_tasks = [c.get_result() for c in children_tasks_futures]

        params = {
            "bot": bot_future.get_result() if bot_future else None,
            "children_tasks": children_tasks,
            "is_admin": acl.is_admin(),
            "is_gae_admin": users.is_current_user_admin(),
            "is_privileged_user": acl.is_privileged_user(),
            "following_task": following_task,
            "full_appid": os.environ["APPLICATION_ID"],
            "host_url": self.request.host_url,
            "is_running": result.state == task_result.State.RUNNING,
            "now": utils.utcnow(),
            "parent_task": parent_task,
            "previous_task": previous_task,
            "request": request,
            "task": result,
            "xsrf_token": self.generate_xsrf_token(),
        }
        self.response.write(template.render("swarming/user_task.html", params))
예제 #36
0
def schedule_request(request):
    """Creates and stores all the entities to schedule a new task request.

  The number of entities created is 3: TaskRequest, TaskResultSummary and
  TaskToRun.

  The TaskRequest is saved first as a DB transaction, then TaskResultSummary and
  TaskToRun are saved as a single DB RPC. The Search index is also updated
  in-between.

  Arguments:
  - request: is in the TaskRequest entity saved in the DB.

  Returns:
    TaskResultSummary. TaskToRun is not returned.
  """
    dupe_future = None
    if request.properties.idempotent:
        # Find a previously run task that is also idempotent and completed. Start a
        # query to fetch items that can be used to dedupe the task. See the comment
        # for this property for more details.
        #
        # Do not use "cls.created_ts > oldest" here because this would require a
        # composite index. It's unnecessary because TaskRequest.key is mostly
        # equivalent to decreasing TaskRequest.created_ts, ordering by key works as
        # well and doesn't require a composite index.
        cls = task_result.TaskResultSummary
        h = request.properties.properties_hash
        dupe_future = cls.query(cls.properties_hash == h).order(cls.key).get_async()

    # At this point, the request is now in the DB but not yet in a mode where it
    # can be triggered or visible. Index it right away so it is searchable. If any
    # of remaining calls in this function fail, the TaskRequest and Search
    # Document will simply point to an incomplete task, which will be ignored.
    #
    # Creates the entities TaskToRun and TaskResultSummary but do not save them
    # yet. TaskRunResult will be created once a bot starts it.
    task = task_to_run.new_task_to_run(request)
    result_summary = task_result.new_result_summary(request)

    # Do not specify a doc_id, as they are guaranteed to be monotonically
    # increasing and searches are done in reverse order, which fits exactly the
    # created_ts ordering. This is useful because DateField is precise to the date
    # (!) and NumberField is signed 32 bits so the best it could do with EPOCH is
    # second resolution up to year 2038.
    index = search.Index(name="requests")
    packed = task_pack.pack_result_summary_key(result_summary.key)
    doc = search.Document(
        fields=[search.TextField(name="name", value=request.name), search.AtomField(name="id", value=packed)]
    )
    # Even if it fails here, we're still fine, as the task is not "alive" yet.
    search_future = index.put_async([doc])

    now = utils.utcnow()

    if dupe_future:
        # Reuse the results!
        dupe_summary = dupe_future.get_result()
        # Refuse tasks older than X days. This is due to the isolate server dropping
        # files. https://code.google.com/p/swarming/issues/detail?id=197
        oldest = now - datetime.timedelta(seconds=config.settings().reusable_task_age_secs)
        if dupe_summary and dupe_summary.created_ts > oldest:
            # If there's a bug, commenting out this block is sufficient to disable the
            # functionality.
            # Setting task.queue_number to None removes it from the scheduling.
            task.queue_number = None
            _copy_entity(dupe_summary, result_summary, ("created_ts", "name", "user", "tags"))
            result_summary.properties_hash = None
            result_summary.try_number = 0
            result_summary.cost_saved_usd = result_summary.cost_usd
            # Only zap after.
            result_summary.costs_usd = []
            result_summary.deduped_from = task_pack.pack_run_result_key(dupe_summary.run_result_key)

    # Get parent task details if applicable.
    parent_task_keys = None
    if request.parent_task_id:
        parent_run_key = task_pack.unpack_run_result_key(request.parent_task_id)
        parent_task_keys = [parent_run_key, task_pack.run_result_key_to_result_summary_key(parent_run_key)]

    result_summary.modified_ts = now

    # Storing these entities makes this task live. It is important at this point
    # that the HTTP handler returns as fast as possible, otherwise the task will
    # be run but the client will not know about it.
    def run():
        ndb.put_multi([result_summary, task])

    def run_parent():
        # This one is slower.
        items = ndb.get_multi(parent_task_keys)
        k = result_summary.task_id
        for item in items:
            item.children_task_ids.append(k)
            item.modified_ts = now
        ndb.put_multi(items)

    # Raising will abort to the caller.
    futures = [datastore_utils.transaction_async(run)]
    if parent_task_keys:
        futures.append(datastore_utils.transaction_async(run_parent))

    try:
        search_future.get_result()
    except search.Error:
        # Do not abort the task, for now search is best effort.
        logging.exception("Put failed")

    for future in futures:
        # Check for failures, it would raise in this case, aborting the call.
        future.get_result()

    stats.add_task_entry(
        "task_enqueued", result_summary.key, dimensions=request.properties.dimensions, user=request.user
    )
    return result_summary
예제 #37
0
  def get(self, task_id):
    try:
      key = task_pack.unpack_result_summary_key(task_id)
      request_key = task_pack.result_summary_key_to_request_key(key)
    except ValueError:
      try:
        key = task_pack.unpack_run_result_key(task_id)
        request_key = task_pack.result_summary_key_to_request_key(
            task_pack.run_result_key_to_result_summary_key(key))
      except (NotImplementedError, ValueError):
        self.abort(404, 'Invalid key format.')

    # 'result' can be either a TaskRunResult or TaskResultSummary.
    result_future = key.get_async()
    request_future = request_key.get_async()
    result = result_future.get_result()
    if not result:
      self.abort(404, 'Invalid key.')

    if not acl.is_privileged_user():
      self.abort(403, 'Implement access control based on the user')

    request = request_future.get_result()
    parent_task_future = None
    if request.parent_task_id:
      parent_key = task_pack.unpack_run_result_key(request.parent_task_id)
      parent_task_future = parent_key.get_async()
    children_tasks_futures = [
      task_pack.unpack_result_summary_key(c).get_async()
      for c in result.children_task_ids
    ]

    bot_id = result.bot_id
    following_task_future = None
    previous_task_future = None
    if result.started_ts:
      # Use a shortcut name because it becomes unwieldy otherwise.
      cls = task_result.TaskRunResult

      # Note that the links will be to the TaskRunResult, not to
      # TaskResultSummary.
      following_task_future = cls.query(
          cls.bot_id == bot_id,
          cls.started_ts > result.started_ts,
          ).order(cls.started_ts).get_async()
      previous_task_future = cls.query(
          cls.bot_id == bot_id,
          cls.started_ts < result.started_ts,
          ).order(-cls.started_ts).get_async()

    bot_future = (
        bot_management.get_info_key(bot_id).get_async() if bot_id else None)

    following_task = None
    if following_task_future:
      following_task = following_task_future.get_result()

    previous_task = None
    if previous_task_future:
      previous_task = previous_task_future.get_result()

    parent_task = None
    if parent_task_future:
      parent_task = parent_task_future.get_result()
    children_tasks = [c.get_result() for c in children_tasks_futures]

    params = {
      'bot': bot_future.get_result() if bot_future else None,
      'children_tasks': children_tasks,
      'is_admin': acl.is_admin(),
      'is_gae_admin': users.is_current_user_admin(),
      'is_privileged_user': acl.is_privileged_user(),
      'following_task': following_task,
      'full_appid': os.environ['APPLICATION_ID'],
      'host_url': self.request.host_url,
      'is_running': result.state == task_result.State.RUNNING,
      'now': utils.utcnow(),
      'parent_task': parent_task,
      'previous_task': previous_task,
      'request': request,
      'task': result,
      'xsrf_token': self.generate_xsrf_token(),
    }
    self.response.write(template.render('swarming/user_task.html', params))
예제 #38
0
  def post(self, task_id=None):
    # Unlike handshake and poll, we do not accept invalid keys here. This code
    # path is much more strict.
    request = self.parse_body()
    msg = log_unexpected_subset_keys(
        self.ACCEPTED_KEYS, self.REQUIRED_KEYS, request, self.request, 'bot',
        'keys')
    if msg:
      self.abort_with_error(400, error=msg)

    bot_id = request['id']
    cost_usd = request['cost_usd']
    task_id = request['task_id']

    duration = request.get('duration')
    exit_code = request.get('exit_code')
    hard_timeout = request.get('hard_timeout')
    io_timeout = request.get('io_timeout')
    output = request.get('output')
    output_chunk_start = request.get('output_chunk_start')
    outputs_ref = request.get('outputs_ref')

    run_result_key = task_pack.unpack_run_result_key(task_id)
    if output is not None:
      try:
        output = base64.b64decode(output)
      except UnicodeEncodeError as e:
        logging.error('Failed to decode output\n%s\n%r', e, output)
        output = output.encode('ascii', 'replace')
      except TypeError as e:
        # Save the output as-is instead. The error will be logged in ereporter2
        # and returning a HTTP 500 would only force the bot to stay in a retry
        # loop.
        logging.error('Failed to decode output\n%s\n%r', e, output)

    try:
      success, completed = task_scheduler.bot_update_task(
          run_result_key, bot_id, output, output_chunk_start,
          exit_code, duration, hard_timeout, io_timeout, cost_usd, outputs_ref)
      if not success:
        logging.info('Failed to update, please retry')
        self.abort_with_error(500, error='Failed to update, please retry')

      action = 'task_completed' if completed else 'task_update'
      bot_management.bot_event(
          event_type=action, bot_id=bot_id,
          external_ip=self.request.remote_addr, dimensions=None, state=None,
          version=None, quarantined=None, task_id=task_id, task_name=None)
    except ValueError as e:
      ereporter2.log_request(
          request=self.request,
          source='server',
          category='task_failure',
          message='Failed to update task: %s' % e)
      self.abort_with_error(400, error=str(e))
    except webob.exc.HTTPException:
      raise
    except Exception as e:
      logging.exception('Internal error: %s', e)
      self.abort_with_error(500, error=str(e))

    # TODO(maruel): When a task is canceled, reply with 'DIE' so that the bot
    # reboots itself to abort the task abruptly. It is useful when a task hangs
    # and the timeout was set too long or the task was superseded by a newer
    # task with more recent executable (e.g. a new Try Server job on a newer
    # patchset on Rietveld).
    self.send_response({'ok': True})
예제 #39
0
 def task(self):
   if not self.task_id:
     return None
   return task_pack.unpack_run_result_key(self.task_id)
예제 #40
0
    def post(self, task_id=None):
        # Unlike handshake and poll, we do not accept invalid keys here. This code
        # path is much more strict.
        request = self.parse_body()
        msg = log_unexpected_subset_keys(self.ACCEPTED_KEYS,
                                         self.REQUIRED_KEYS, request,
                                         self.request, 'bot', 'keys')
        if msg:
            self.abort_with_error(400, error=msg)

        bot_id = request['id']
        task_id = request['task_id']

        machine_type = None
        bot_info = bot_management.get_info_key(bot_id).get()
        if bot_info:
            machine_type = bot_info.machine_type

        # Make sure bot self-reported ID matches the authentication token. Raises
        # auth.AuthorizationError if not.
        bot_auth.validate_bot_id_and_fetch_config(bot_id, machine_type)

        bot_overhead = request.get('bot_overhead')
        cipd_pins = request.get('cipd_pins')
        cipd_stats = request.get('cipd_stats')
        cost_usd = request.get('cost_usd', 0)
        duration = request.get('duration')
        exit_code = request.get('exit_code')
        hard_timeout = request.get('hard_timeout')
        io_timeout = request.get('io_timeout')
        isolated_stats = request.get('isolated_stats')
        output = request.get('output')
        output_chunk_start = request.get('output_chunk_start')
        outputs_ref = request.get('outputs_ref')

        if (isolated_stats or cipd_stats) and bot_overhead is None:
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' %
                                   task_id)
            self.abort_with_error(
                400,
                error=
                'isolated_stats and cipd_stats require bot_overhead to be set'
                '\nbot_overhead: %s\nisolate_stats: %s' %
                (bot_overhead, isolated_stats))

        run_result_key = task_pack.unpack_run_result_key(task_id)
        performance_stats = None
        if bot_overhead is not None:
            performance_stats = task_result.PerformanceStats(
                bot_overhead=bot_overhead)
            if isolated_stats:
                download = isolated_stats.get('download') or {}
                upload = isolated_stats.get('upload') or {}

                def unpack_base64(d, k):
                    x = d.get(k)
                    if x:
                        return base64.b64decode(x)

                performance_stats.isolated_download = task_result.OperationStats(
                    duration=download.get('duration'),
                    initial_number_items=download.get('initial_number_items'),
                    initial_size=download.get('initial_size'),
                    items_cold=unpack_base64(download, 'items_cold'),
                    items_hot=unpack_base64(download, 'items_hot'))
                performance_stats.isolated_upload = task_result.OperationStats(
                    duration=upload.get('duration'),
                    items_cold=unpack_base64(upload, 'items_cold'),
                    items_hot=unpack_base64(upload, 'items_hot'))
            if cipd_stats:
                performance_stats.package_installation = task_result.OperationStats(
                    duration=cipd_stats.get('duration'))

        if output is not None:
            try:
                output = base64.b64decode(output)
            except UnicodeEncodeError as e:
                logging.error('Failed to decode output\n%s\n%r', e, output)
                output = output.encode('ascii', 'replace')
            except TypeError as e:
                # Save the output as-is instead. The error will be logged in ereporter2
                # and returning a HTTP 500 would only force the bot to stay in a retry
                # loop.
                logging.error('Failed to decode output\n%s\n%r', e, output)
        if outputs_ref:
            outputs_ref = task_request.FilesRef(**outputs_ref)

        if cipd_pins:
            cipd_pins = task_result.CipdPins(
                client_package=task_request.CipdPackage(
                    **cipd_pins['client_package']),
                packages=[
                    task_request.CipdPackage(**args)
                    for args in cipd_pins['packages']
                ])

        try:
            state = task_scheduler.bot_update_task(
                run_result_key=run_result_key,
                bot_id=bot_id,
                output=output,
                output_chunk_start=output_chunk_start,
                exit_code=exit_code,
                duration=duration,
                hard_timeout=hard_timeout,
                io_timeout=io_timeout,
                cost_usd=cost_usd,
                outputs_ref=outputs_ref,
                cipd_pins=cipd_pins,
                performance_stats=performance_stats)
            if not state:
                logging.info('Failed to update, please retry')
                self.abort_with_error(500,
                                      error='Failed to update, please retry')

            if state in (task_result.State.COMPLETED,
                         task_result.State.TIMED_OUT):
                action = 'task_completed'
            elif state == task_result.State.KILLED:
                action = 'task_killed'
            else:
                assert state in (task_result.State.BOT_DIED,
                                 task_result.State.RUNNING), state
                action = 'task_update'
            bot_management.bot_event(
                event_type=action,
                bot_id=bot_id,
                external_ip=self.request.remote_addr,
                authenticated_as=auth.get_peer_identity().to_bytes(),
                dimensions=None,
                state=None,
                version=None,
                quarantined=None,
                maintenance_msg=None,
                task_id=task_id,
                task_name=None)
        except ValueError as e:
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' % e)
            self.abort_with_error(400, error=str(e))
        except webob.exc.HTTPException:
            raise
        except Exception as e:
            logging.exception('Internal error: %s', e)
            self.abort_with_error(500, error=str(e))
        self.send_response({
            'must_stop': state == task_result.State.KILLED,
            'ok': True
        })
예제 #41
0
    def post(self, task_id=None):
        # Unlike handshake and poll, we do not accept invalid keys here. This code
        # path is much more strict.
        request = self.parse_body()
        msg = log_unexpected_subset_keys(self.ACCEPTED_KEYS, self.REQUIRED_KEYS, request, self.request, "bot", "keys")
        if msg:
            self.abort_with_error(400, error=msg)

        bot_id = request["id"]
        cost_usd = request["cost_usd"]
        task_id = request["task_id"]

        duration = request.get("duration")
        exit_code = request.get("exit_code")
        hard_timeout = request.get("hard_timeout")
        io_timeout = request.get("io_timeout")
        output = request.get("output")
        output_chunk_start = request.get("output_chunk_start")
        outputs_ref = request.get("outputs_ref")

        run_result_key = task_pack.unpack_run_result_key(task_id)
        if output is not None:
            try:
                output = base64.b64decode(output)
            except UnicodeEncodeError as e:
                logging.error("Failed to decode output\n%s\n%r", e, output)
                output = output.encode("ascii", "replace")
            except TypeError as e:
                # Save the output as-is instead. The error will be logged in ereporter2
                # and returning a HTTP 500 would only force the bot to stay in a retry
                # loop.
                logging.error("Failed to decode output\n%s\n%r", e, output)

        try:
            success, completed = task_scheduler.bot_update_task(
                run_result_key,
                bot_id,
                output,
                output_chunk_start,
                exit_code,
                duration,
                hard_timeout,
                io_timeout,
                cost_usd,
                outputs_ref,
            )
            if not success:
                self.abort_with_error(500, error="Failed to update, please retry")

            action = "task_completed" if completed else "task_update"
            bot_management.bot_event(
                event_type=action,
                bot_id=bot_id,
                external_ip=self.request.remote_addr,
                dimensions=None,
                state=None,
                version=None,
                quarantined=None,
                task_id=task_id,
                task_name=None,
            )
        except ValueError as e:
            ereporter2.log_request(
                request=self.request, source="server", category="task_failure", message="Failed to update task: %s" % e
            )
            self.abort_with_error(400, error=str(e))
        except Exception as e:
            self.abort_with_error(500, error=str(e))

        # TODO(maruel): When a task is canceled, reply with 'DIE' so that the bot
        # reboots itself to abort the task abruptly. It is useful when a task hangs
        # and the timeout was set too long or the task was superseded by a newer
        # task with more recent executable (e.g. a new Try Server job on a newer
        # patchset on Rietveld).
        self.send_response({"ok": True})