예제 #1
0
    def post(self):
        res = self._process()
        event = res.request.get('event')
        if event not in self.ALLOWED_EVENTS:
            logging.error('Unexpected event type')
            self.abort_with_error(400, error='Unsupported event type')
        message = res.request.get('message')
        # Record the event in a BotEvent entity so it can be listed on the bot's
        # page.
        bot_management.bot_event(
            event_type=event,
            bot_id=res.bot_id,
            external_ip=self.request.remote_addr,
            authenticated_as=auth.get_peer_identity().to_bytes(),
            dimensions=res.dimensions,
            state=res.state,
            version=res.version,
            quarantined=bool(res.quarantined_msg),
            maintenance_msg=res.maintenance_msg,
            task_id=None,
            task_name=None,
            message=message)

        if event == 'bot_error':
            # Also logs this to ereporter2, so it will be listed in the server's
            # hourly ereporter2 report. THIS IS NOISY so it should only be done with
            # issues requiring action. In this case, include again the bot's URL since
            # there's no context in the report. Redundantly include the bot id so
            # messages are bucketted by bot.
            line = ('%s\n'
                    '\nhttps://%s/restricted/bot/%s') % (
                        message, app_identity.get_default_version_hostname(),
                        res.bot_id)
            ereporter2.log_request(self.request, source='bot', message=line)
        self.send_response({})
예제 #2
0
    def post(self):
        (request, bot_id, version, state, dimensions,
         quarantined_msg) = self._process()
        event = request.get('event')
        if event not in ('bot_error', 'bot_rebooting', 'bot_shutdown'):
            self.abort_with_error(400, error='Unsupported event type')
        message = request.get('message')
        bot_management.bot_event(event_type=event,
                                 bot_id=bot_id,
                                 external_ip=self.request.remote_addr,
                                 dimensions=dimensions,
                                 state=state,
                                 version=version,
                                 quarantined=bool(quarantined_msg),
                                 task_id=None,
                                 task_name=None,
                                 message=message)

        if event == 'bot_error':
            line = ('Bot: https://%s/restricted/bot/%s\n'
                    'Bot error:\n'
                    '%s') % (app_identity.get_default_version_hostname(),
                             bot_id, message)
            ereporter2.log_request(self.request, source='bot', message=line)
        self.send_response({})
예제 #3
0
  def post(self, task_id=None):
    request = self.parse_body()
    bot_id = request.get('id')
    task_id = request.get('task_id', '')
    message = request.get('message', 'unknown')

    bot_management.bot_event(
        event_type='task_error', bot_id=bot_id,
        external_ip=self.request.remote_addr, dimensions=None, state=None,
        version=None, quarantined=None, task_id=task_id, task_name=None,
        message=message)
    line = (
        'Bot: https://%s/restricted/bot/%s\n'
        'Task failed: https://%s/user/task/%s\n'
        '%s') % (
        app_identity.get_default_version_hostname(), bot_id,
        app_identity.get_default_version_hostname(), task_id,
        message)
    ereporter2.log_request(self.request, source='bot', message=line)

    msg = log_unexpected_keys(
        self.EXPECTED_KEYS, request, self.request, 'bot', 'keys')
    if msg:
      self.abort_with_error(400, error=msg)

    msg = task_scheduler.bot_kill_task(
        task_pack.unpack_run_result_key(task_id), bot_id)
    if msg:
      logging.error(msg)
      self.abort_with_error(400, error=msg)
    self.send_response({})
예제 #4
0
    def post(self, task_id=None):
        request = self.parse_body()
        bot_id = request.get('id')
        task_id = request.get('task_id', '')
        message = request.get('message', 'unknown')

        bot_management.bot_event(event_type='task_error',
                                 bot_id=bot_id,
                                 external_ip=self.request.remote_addr,
                                 dimensions=None,
                                 state=None,
                                 version=None,
                                 quarantined=None,
                                 task_id=task_id,
                                 task_name=None,
                                 message=message)
        line = ('Bot: https://%s/restricted/bot/%s\n'
                'Task failed: https://%s/user/task/%s\n'
                '%s') % (app_identity.get_default_version_hostname(), bot_id,
                         app_identity.get_default_version_hostname(), task_id,
                         message)
        ereporter2.log_request(self.request, source='bot', message=line)

        msg = log_unexpected_keys(self.EXPECTED_KEYS, request, self.request,
                                  'bot', 'keys')
        if msg:
            self.abort_with_error(400, error=msg)

        msg = task_scheduler.bot_kill_task(
            task_pack.unpack_run_result_key(task_id), bot_id)
        if msg:
            logging.error(msg)
            self.abort_with_error(400, error=msg)
        self.send_response({})
예제 #5
0
    def post(self):
        (request, bot_id, version, state, dimensions, quarantined_msg) = self._process()
        event = request.get("event")
        if event not in ("bot_error", "bot_rebooting", "bot_shutdown"):
            self.abort_with_error(400, error="Unsupported event type")
        message = request.get("message")
        bot_management.bot_event(
            event_type=event,
            bot_id=bot_id,
            external_ip=self.request.remote_addr,
            dimensions=dimensions,
            state=state,
            version=version,
            quarantined=bool(quarantined_msg),
            task_id=None,
            task_name=None,
            message=message,
        )

        if event == "bot_error":
            line = ("Bot: https://%s/restricted/bot/%s\n" "Bot error:\n" "%s") % (
                app_identity.get_default_version_hostname(),
                bot_id,
                message,
            )
            ereporter2.log_request(self.request, source="bot", message=line)
        self.send_response({})
예제 #6
0
def log_unexpected_subset_keys(expected_keys, minimum_keys, actual_keys, request, source, name):
    """Logs an error if unexpected keys are present or expected keys are missing.

  Accepts optional keys.

  This is important to catch typos.
  """
    message = has_unexpected_subset_keys(expected_keys, minimum_keys, actual_keys, name)
    if message:
        ereporter2.log_request(request, source=source, message=message)
    return message
예제 #7
0
def log_unexpected_subset_keys(expected_keys, minimum_keys, actual_keys,
                               request, source, name):
    """Logs an error if unexpected keys are present or expected keys are missing.

  Accepts optional keys.

  This is important to catch typos.
  """
    message = has_unexpected_subset_keys(expected_keys, minimum_keys,
                                         actual_keys, name)
    if message:
        ereporter2.log_request(request, source=source, message=message)
    return message
예제 #8
0
    def post(self, task_id=None):
        request = self.parse_body()
        bot_id = request.get('id')
        task_id = request.get('task_id', '')
        message = request.get('message', 'unknown')

        machine_type = None
        bot_info = bot_management.get_info_key(bot_id).get()
        if bot_info:
            machine_type = bot_info.machine_type

        # Make sure bot self-reported ID matches the authentication token. Raises
        # auth.AuthorizationError if not.
        bot_auth.validate_bot_id_and_fetch_config(bot_id, machine_type)

        bot_management.bot_event(
            event_type='task_error',
            bot_id=bot_id,
            external_ip=self.request.remote_addr,
            authenticated_as=auth.get_peer_identity().to_bytes(),
            dimensions=None,
            state=None,
            version=None,
            quarantined=None,
            maintenance_msg=None,
            task_id=task_id,
            task_name=None,
            message=message)
        line = ('Bot: https://%s/restricted/bot/%s\n'
                'Task failed: https://%s/user/task/%s\n'
                '%s') % (app_identity.get_default_version_hostname(), bot_id,
                         app_identity.get_default_version_hostname(), task_id,
                         message)
        ereporter2.log_request(self.request, source='bot', message=line)

        msg = log_unexpected_keys(self.EXPECTED_KEYS, request, self.request,
                                  'bot', 'keys')
        if msg:
            self.abort_with_error(400, error=msg)

        msg = task_scheduler.bot_kill_task(
            task_pack.unpack_run_result_key(task_id), bot_id)
        if msg:
            logging.error(msg)
            self.abort_with_error(400, error=msg)
        self.send_response({})
예제 #9
0
  def post(self):
    request = self.parse_body()
    log_unexpected_keys(
        self.EXPECTED_KEYS, request, self.request, 'bot', 'keys')
    message = request.get('message', 'unknown')
    bot_id = request.get('id')
    if bot_id:
      bot_management.bot_event(
          event_type='bot_error', bot_id=bot_id,
          external_ip=self.request.remote_addr, dimensions=None, state=None,
          version=None, quarantined=None, task_id=None, task_name=None,
          message=message)

    # Also log inconditionally an ereporter2 event.
    line = (
        'Bot: https://%s/restricted/bot/%s\n'
        'Old API error:\n'
        '%s') % (
        app_identity.get_default_version_hostname(), bot_id, message)
    ereporter2.log_request(self.request, source='bot', message=line)
    self.send_response({})
예제 #10
0
  def post(self):
    (request, bot_id, version, state,
        dimensions, quarantined_msg) = self._process()
    event = request.get('event')
    if event not in ('bot_error', 'bot_rebooting', 'bot_shutdown'):
      self.abort_with_error(400, error='Unsupported event type')
    message = request.get('message')
    bot_management.bot_event(
        event_type=event, bot_id=bot_id, external_ip=self.request.remote_addr,
        dimensions=dimensions, state=state, version=version,
        quarantined=bool(quarantined_msg), task_id=None, task_name=None,
        message=message)

    if event == 'bot_error':
      line = (
          'Bot: https://%s/restricted/bot/%s\n'
          'Bot error:\n'
          '%s') % (
          app_identity.get_default_version_hostname(), bot_id, message)
      ereporter2.log_request(self.request, source='bot', message=line)
    self.send_response({})
예제 #11
0
    def _process(self):
        """Fetches bot info and settings, does authorization and quarantine checks.

    Returns:
      _ProcessResult instance, see its fields for more info.

    Raises:
      auth.AuthorizationError if bot's credentials are invalid.
    """
        request = self.parse_body()
        version = request.get('version', None)

        dimensions = request.get('dimensions') or {}
        state = request.get('state') or {}
        bot_id = None
        if dimensions.get('id'):
            dimension_id = dimensions['id']
            if (isinstance(dimension_id, list) and len(dimension_id) == 1
                    and isinstance(dimension_id[0], unicode)):
                bot_id = dimensions['id'][0]

        lease_expiration_ts = None
        machine_type = None
        if bot_id:
            logging.debug('Fetching bot info and settings')
            bot_info, bot_settings = ndb.get_multi([
                bot_management.get_info_key(bot_id),
                bot_management.get_settings_key(bot_id)
            ])
            if bot_info:
                lease_expiration_ts = bot_info.lease_expiration_ts
                machine_type = bot_info.machine_type

        # Make sure bot self-reported ID matches the authentication token. Raises
        # auth.AuthorizationError if not.
        logging.debug('Fetching bot group config')
        bot_group_cfg = bot_auth.validate_bot_id_and_fetch_config(
            bot_id, machine_type)

        # The server side dimensions from bot_group_cfg override bot-provided ones.
        # If both server side config and bot report some dimension, server side
        # config wins. We still emit an warning if bot tries to supply the dimension
        # and it disagrees with the server defined one. Note that this may happen
        # on a first poll after server side config for a bot has changed. The bot
        # doesn't know about new server-assigned dimensions yet in this case. Also
        # don't report ['default'], bot sends it in the handshake before it knows
        # anything at all.
        for dim_key, from_cfg in bot_group_cfg.dimensions.iteritems():
            from_bot = sorted(dimensions.get(dim_key) or [])
            from_cfg = sorted(from_cfg)
            if from_bot and from_bot != ['default'] and from_bot != from_cfg:
                logging.warning(
                    'Dimensions in bots.cfg don\'t match ones provided by the bot\n'
                    'bot_id: "%s", key: "%s", from_bot: %s, from_cfg: %s',
                    bot_id, dim_key, from_bot, from_cfg)
            dimensions[dim_key] = from_cfg

        # Fill in all result fields except 'quarantined_msg'.
        result = _ProcessResult(request=request,
                                bot_id=bot_id,
                                version=version,
                                state=state,
                                dimensions=dimensions,
                                bot_group_cfg=bot_group_cfg,
                                lease_expiration_ts=lease_expiration_ts,
                                maintenance_msg=state.get('maintenance'))

        # The bot may decide to "self-quarantine" itself. Accept both via
        # dimensions or via state. See bot_management._BotCommon.quarantined for
        # more details.
        if (bool(dimensions.get('quarantined'))
                or bool(state.get('quarantined'))):
            result.quarantined_msg = 'Bot self-quarantined'
            return result

        quarantined_msg = None
        # Use a dummy 'for' to be able to break early from the block.
        for _ in [0]:

            quarantined_msg = has_unexpected_keys(self.EXPECTED_KEYS, request,
                                                  'keys')
            if quarantined_msg:
                break

            quarantined_msg = has_missing_keys(self.REQUIRED_STATE_KEYS, state,
                                               'state')
            if quarantined_msg:
                break

            if not bot_id:
                quarantined_msg = 'Missing bot id'
                break
            if not dimensions.get('pool'):
                quarantined_msg = 'Missing \'pool\' dimension'
                break

            if not all(
                    config.validate_dimension_key(key)
                    and isinstance(values, list) and all(
                        config.validate_dimension_value(value)
                        for value in values)
                    for key, values in dimensions.iteritems()):
                quarantined_msg = ('Invalid dimensions type:\n%s' %
                                   json.dumps(dimensions,
                                              sort_keys=True,
                                              indent=2,
                                              separators=(',', ': ')))
                break

        if quarantined_msg:
            line = 'Quarantined Bot\nhttps://%s/restricted/bot/%s\n%s' % (
                app_identity.get_default_version_hostname(), bot_id,
                quarantined_msg)
            ereporter2.log_request(self.request, source='bot', message=line)
            result.quarantined_msg = quarantined_msg
            return result

        # Look for admin enforced quarantine.
        if bool(bot_settings and bot_settings.quarantined):
            result.quarantined_msg = 'Quarantined by admin'
            return result

        # TODO(maruel): Parallelise.
        task_queues.assert_bot_async(dimensions).get_result()
        return result
예제 #12
0
  def post(self, task_id=None):
    # Unlike handshake and poll, we do not accept invalid keys here. This code
    # path is much more strict.
    request = self.parse_body()
    msg = log_unexpected_subset_keys(
        self.ACCEPTED_KEYS, self.REQUIRED_KEYS, request, self.request, 'bot',
        'keys')
    if msg:
      self.abort_with_error(400, error=msg)

    bot_id = request['id']
    cost_usd = request['cost_usd']
    task_id = request['task_id']

    duration = request.get('duration')
    exit_code = request.get('exit_code')
    hard_timeout = request.get('hard_timeout')
    io_timeout = request.get('io_timeout')
    output = request.get('output')
    output_chunk_start = request.get('output_chunk_start')
    outputs_ref = request.get('outputs_ref')

    run_result_key = task_pack.unpack_run_result_key(task_id)
    if output is not None:
      try:
        output = base64.b64decode(output)
      except UnicodeEncodeError as e:
        logging.error('Failed to decode output\n%s\n%r', e, output)
        output = output.encode('ascii', 'replace')
      except TypeError as e:
        # Save the output as-is instead. The error will be logged in ereporter2
        # and returning a HTTP 500 would only force the bot to stay in a retry
        # loop.
        logging.error('Failed to decode output\n%s\n%r', e, output)

    try:
      success, completed = task_scheduler.bot_update_task(
          run_result_key, bot_id, output, output_chunk_start,
          exit_code, duration, hard_timeout, io_timeout, cost_usd, outputs_ref)
      if not success:
        logging.info('Failed to update, please retry')
        self.abort_with_error(500, error='Failed to update, please retry')

      action = 'task_completed' if completed else 'task_update'
      bot_management.bot_event(
          event_type=action, bot_id=bot_id,
          external_ip=self.request.remote_addr, dimensions=None, state=None,
          version=None, quarantined=None, task_id=task_id, task_name=None)
    except ValueError as e:
      ereporter2.log_request(
          request=self.request,
          source='server',
          category='task_failure',
          message='Failed to update task: %s' % e)
      self.abort_with_error(400, error=str(e))
    except webob.exc.HTTPException:
      raise
    except Exception as e:
      logging.exception('Internal error: %s', e)
      self.abort_with_error(500, error=str(e))

    # TODO(maruel): When a task is canceled, reply with 'DIE' so that the bot
    # reboots itself to abort the task abruptly. It is useful when a task hangs
    # and the timeout was set too long or the task was superseded by a newer
    # task with more recent executable (e.g. a new Try Server job on a newer
    # patchset on Rietveld).
    self.send_response({'ok': True})
예제 #13
0
    def post(self, task_id=None):
        # Unlike handshake and poll, we do not accept invalid keys here. This code
        # path is much more strict.
        request = self.parse_body()
        msg = log_unexpected_subset_keys(self.ACCEPTED_KEYS,
                                         self.REQUIRED_KEYS, request,
                                         self.request, 'bot', 'keys')
        if msg:
            self.abort_with_error(400, error=msg)

        bot_id = request['id']
        cost_usd = request['cost_usd']
        task_id = request['task_id']

        duration = request.get('duration')
        exit_code = request.get('exit_code')
        hard_timeout = request.get('hard_timeout')
        io_timeout = request.get('io_timeout')
        output = request.get('output')
        output_chunk_start = request.get('output_chunk_start')
        outputs_ref = request.get('outputs_ref')

        run_result_key = task_pack.unpack_run_result_key(task_id)
        if output is not None:
            try:
                output = base64.b64decode(output)
            except UnicodeEncodeError as e:
                logging.error('Failed to decode output\n%s\n%r', e, output)
                output = output.encode('ascii', 'replace')
            except TypeError as e:
                # Save the output as-is instead. The error will be logged in ereporter2
                # and returning a HTTP 500 would only force the bot to stay in a retry
                # loop.
                logging.error('Failed to decode output\n%s\n%r', e, output)

        try:
            success, completed = task_scheduler.bot_update_task(
                run_result_key, bot_id, output, output_chunk_start, exit_code,
                duration, hard_timeout, io_timeout, cost_usd, outputs_ref)
            if not success:
                logging.info('Failed to update, please retry')
                self.abort_with_error(500,
                                      error='Failed to update, please retry')

            action = 'task_completed' if completed else 'task_update'
            bot_management.bot_event(event_type=action,
                                     bot_id=bot_id,
                                     external_ip=self.request.remote_addr,
                                     dimensions=None,
                                     state=None,
                                     version=None,
                                     quarantined=None,
                                     task_id=task_id,
                                     task_name=None)
        except ValueError as e:
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' % e)
            self.abort_with_error(400, error=str(e))
        except webob.exc.HTTPException:
            raise
        except Exception as e:
            logging.exception('Internal error: %s', e)
            self.abort_with_error(500, error=str(e))

        # TODO(maruel): When a task is canceled, reply with 'DIE' so that the bot
        # reboots itself to abort the task abruptly. It is useful when a task hangs
        # and the timeout was set too long or the task was superseded by a newer
        # task with more recent executable (e.g. a new Try Server job on a newer
        # patchset on Rietveld).
        self.send_response({'ok': True})
예제 #14
0
    def _process(self):
        """Returns True if the bot has invalid parameter and should be automatically
    quarantined.

    Does one DB synchronous GET.

    Returns:
      _ProcessResult instance, see its fields for more info.

    Raises:
      auth.AuthorizationError if bot's credentials are invalid.
    """
        request = self.parse_body()
        version = request.get('version', None)

        dimensions = request.get('dimensions') or {}
        state = request.get('state') or {}
        bot_id = None
        if dimensions.get('id'):
            dimension_id = dimensions['id']
            if (isinstance(dimension_id, list) and len(dimension_id) == 1
                    and isinstance(dimension_id[0], unicode)):
                bot_id = dimensions['id'][0]

        # Make sure bot self-reported ID matches the authentication token. Raises
        # auth.AuthorizationError if not.
        bot_group_cfg = bot_auth.validate_bot_id_and_fetch_config(bot_id)

        # The server side dimensions from bot_group_cfg override bot-provided ones.
        # If both server side config and bot report some dimension, server side
        # config wins. We still emit an error if bot tries to supply the dimension
        # and it disagrees with the server defined one. Don't report ['default'] as
        # an error, bot sends it in the handshake before it knows anything at all.
        for dim_key, from_cfg in bot_group_cfg.dimensions.iteritems():
            from_bot = sorted(dimensions.get(dim_key) or [])
            from_cfg = sorted(from_cfg)
            if from_bot and from_bot != ['default'] and from_bot != from_cfg:
                logging.error(
                    'Dimensions in bots.cfg doesn\'t match ones provided by the bot\n'
                    'bot_id: "%s", key: "%s", from_bot: %s, from_cfg: %s',
                    bot_id, dim_key, from_bot, from_cfg)
            dimensions[dim_key] = from_cfg

        # Fill in all result fields except 'quarantined_msg'.
        result = _ProcessResult(request=request,
                                bot_id=bot_id,
                                version=version,
                                state=state,
                                dimensions=dimensions,
                                bot_group_cfg=bot_group_cfg)

        # The bot may decide to "self-quarantine" itself. Accept both via
        # dimensions or via state. See bot_management._BotCommon.quarantined for
        # more details.
        if (bool(dimensions.get('quarantined'))
                or bool(state.get('quarantined'))):
            result.quarantined_msg = 'Bot self-quarantined'
            return result

        quarantined_msg = None
        # Use a dummy 'for' to be able to break early from the block.
        for _ in [0]:

            quarantined_msg = has_unexpected_keys(self.EXPECTED_KEYS, request,
                                                  'keys')
            if quarantined_msg:
                break

            quarantined_msg = has_missing_keys(self.REQUIRED_STATE_KEYS, state,
                                               'state')
            if quarantined_msg:
                break

            if not bot_id:
                quarantined_msg = 'Missing bot id'
                break
            if not dimensions.get('pool'):
                quarantined_msg = 'Missing \'pool\' dimension'
                break

            if not all(
                    isinstance(key, unicode)
                    and re.match(task_request.DIMENSION_KEY_RE, key)
                    and isinstance(values, list) and all(
                        isinstance(value, unicode) for value in values)
                    for key, values in dimensions.iteritems()):
                quarantined_msg = ('Invalid dimensions type:\n%s' %
                                   json.dumps(dimensions,
                                              sort_keys=True,
                                              indent=2,
                                              separators=(',', ': ')))
                break

            dimensions_count = task_to_run.dimensions_powerset_count(
                dimensions)
            if dimensions_count > task_to_run.MAX_DIMENSIONS:
                quarantined_msg = 'Dimensions product %d is too high' % dimensions_count
                break

            if not isinstance(state.get('lease_expiration_ts'),
                              (None.__class__, int)):
                quarantined_msg = (
                    'lease_expiration_ts (%r) must be int or None' %
                    (state['lease_expiration_ts']))
                break

        if quarantined_msg:
            line = 'Quarantined Bot\nhttps://%s/restricted/bot/%s\n%s' % (
                app_identity.get_default_version_hostname(), bot_id,
                quarantined_msg)
            ereporter2.log_request(self.request, source='bot', message=line)
            result.quarantined_msg = quarantined_msg
            return result

        # Look for admin enforced quarantine.
        bot_settings = bot_management.get_settings_key(bot_id).get()
        if bool(bot_settings and bot_settings.quarantined):
            result.quarantined_msg = 'Quarantined by admin'
            return result

        return result
예제 #15
0
    def _process(self):
        """Returns True if the bot has invalid parameter and should be automatically
    quarantined.

    Does one DB synchronous GET.

    Returns:
      tuple(request, bot_id, version, state, dimensions, quarantined_msg)
    """
        request = self.parse_body()
        version = request.get('version', None)

        dimensions = request.get('dimensions', {})
        state = request.get('state', {})
        bot_id = None
        if dimensions.get('id'):
            dimension_id = dimensions['id']
            if (isinstance(dimension_id, list) and len(dimension_id) == 1
                    and isinstance(dimension_id[0], unicode)):
                bot_id = dimensions['id'][0]

        # The bot may decide to "self-quarantine" itself. Accept both via
        # dimensions or via state. See bot_management._BotCommon.quarantined for
        # more details.
        if (bool(dimensions.get('quarantined'))
                or bool(state.get('quarantined'))):
            return request, bot_id, version, state, dimensions, 'Bot self-quarantined'

        quarantined_msg = None
        # Use a dummy 'for' to be able to break early from the block.
        for _ in [0]:

            quarantined_msg = has_unexpected_keys(self.EXPECTED_KEYS, request,
                                                  'keys')
            if quarantined_msg:
                break

            quarantined_msg = has_missing_keys(self.REQUIRED_STATE_KEYS, state,
                                               'state')
            if quarantined_msg:
                break

            if not bot_id:
                quarantined_msg = 'Missing bot id'
                break

            if not all(
                    isinstance(key, unicode) and isinstance(values, list)
                    and all(isinstance(value, unicode) for value in values)
                    for key, values in dimensions.iteritems()):
                quarantined_msg = ('Invalid dimensions type:\n%s' %
                                   json.dumps(dimensions,
                                              sort_keys=True,
                                              indent=2,
                                              separators=(',', ': ')))
                break

            dimensions_count = task_to_run.dimensions_powerset_count(
                dimensions)
            if dimensions_count > task_to_run.MAX_DIMENSIONS:
                quarantined_msg = 'Dimensions product %d is too high' % dimensions_count
                break

        if quarantined_msg:
            line = 'Quarantined Bot\nhttps://%s/restricted/bot/%s\n%s' % (
                app_identity.get_default_version_hostname(), bot_id,
                quarantined_msg)
            ereporter2.log_request(self.request, source='bot', message=line)
            return request, bot_id, version, state, dimensions, quarantined_msg

        # Look for admin enforced quarantine.
        bot_settings = bot_management.get_settings_key(bot_id).get()
        if bool(bot_settings and bot_settings.quarantined):
            return request, bot_id, version, state, dimensions, 'Quarantined by admin'

        return request, bot_id, version, state, dimensions, None
예제 #16
0
    def _process(self):
        """Returns True if the bot has invalid parameter and should be automatically
    quarantined.

    Does one DB synchronous GET.

    Returns:
      tuple(request, bot_id, version, state, dimensions, quarantined_msg)
    """
        request = self.parse_body()
        version = request.get("version", None)

        dimensions = request.get("dimensions", {})
        state = request.get("state", {})
        bot_id = None
        if dimensions.get("id"):
            dimension_id = dimensions["id"]
            if isinstance(dimension_id, list) and len(dimension_id) == 1 and isinstance(dimension_id[0], unicode):
                bot_id = dimensions["id"][0]

        # The bot may decide to "self-quarantine" itself. Accept both via
        # dimensions or via state. See bot_management._BotCommon.quarantined for
        # more details.
        if bool(dimensions.get("quarantined")) or bool(state.get("quarantined")):
            return request, bot_id, version, state, dimensions, "Bot self-quarantined"

        quarantined_msg = None
        # Use a dummy 'for' to be able to break early from the block.
        for _ in [0]:

            quarantined_msg = has_unexpected_keys(self.EXPECTED_KEYS, request, "keys")
            if quarantined_msg:
                break

            quarantined_msg = has_missing_keys(self.REQUIRED_STATE_KEYS, state, "state")
            if quarantined_msg:
                break

            if not bot_id:
                quarantined_msg = "Missing bot id"
                break

            if not all(
                isinstance(key, unicode)
                and isinstance(values, list)
                and all(isinstance(value, unicode) for value in values)
                for key, values in dimensions.iteritems()
            ):
                quarantined_msg = "Invalid dimensions type:\n%s" % json.dumps(
                    dimensions, sort_keys=True, indent=2, separators=(",", ": ")
                )
                break

            dimensions_count = task_to_run.dimensions_powerset_count(dimensions)
            if dimensions_count > task_to_run.MAX_DIMENSIONS:
                quarantined_msg = "Dimensions product %d is too high" % dimensions_count
                break

        if quarantined_msg:
            line = "Quarantined Bot\nhttps://%s/restricted/bot/%s\n%s" % (
                app_identity.get_default_version_hostname(),
                bot_id,
                quarantined_msg,
            )
            ereporter2.log_request(self.request, source="bot", message=line)
            return request, bot_id, version, state, dimensions, quarantined_msg

        # Look for admin enforced quarantine.
        bot_settings = bot_management.get_settings_key(bot_id).get()
        if bool(bot_settings and bot_settings.quarantined):
            return request, bot_id, version, state, dimensions, "Quarantined by admin"

        return request, bot_id, version, state, dimensions, None
예제 #17
0
    def post(self, task_id=None):
        # Unlike handshake and poll, we do not accept invalid keys here. This code
        # path is much more strict.
        request = self.parse_body()
        msg = log_unexpected_subset_keys(self.ACCEPTED_KEYS, self.REQUIRED_KEYS, request, self.request, "bot", "keys")
        if msg:
            self.abort_with_error(400, error=msg)

        bot_id = request["id"]
        cost_usd = request["cost_usd"]
        task_id = request["task_id"]

        duration = request.get("duration")
        exit_code = request.get("exit_code")
        hard_timeout = request.get("hard_timeout")
        io_timeout = request.get("io_timeout")
        output = request.get("output")
        output_chunk_start = request.get("output_chunk_start")
        outputs_ref = request.get("outputs_ref")

        run_result_key = task_pack.unpack_run_result_key(task_id)
        if output is not None:
            try:
                output = base64.b64decode(output)
            except UnicodeEncodeError as e:
                logging.error("Failed to decode output\n%s\n%r", e, output)
                output = output.encode("ascii", "replace")
            except TypeError as e:
                # Save the output as-is instead. The error will be logged in ereporter2
                # and returning a HTTP 500 would only force the bot to stay in a retry
                # loop.
                logging.error("Failed to decode output\n%s\n%r", e, output)

        try:
            success, completed = task_scheduler.bot_update_task(
                run_result_key,
                bot_id,
                output,
                output_chunk_start,
                exit_code,
                duration,
                hard_timeout,
                io_timeout,
                cost_usd,
                outputs_ref,
            )
            if not success:
                self.abort_with_error(500, error="Failed to update, please retry")

            action = "task_completed" if completed else "task_update"
            bot_management.bot_event(
                event_type=action,
                bot_id=bot_id,
                external_ip=self.request.remote_addr,
                dimensions=None,
                state=None,
                version=None,
                quarantined=None,
                task_id=task_id,
                task_name=None,
            )
        except ValueError as e:
            ereporter2.log_request(
                request=self.request, source="server", category="task_failure", message="Failed to update task: %s" % e
            )
            self.abort_with_error(400, error=str(e))
        except Exception as e:
            self.abort_with_error(500, error=str(e))

        # TODO(maruel): When a task is canceled, reply with 'DIE' so that the bot
        # reboots itself to abort the task abruptly. It is useful when a task hangs
        # and the timeout was set too long or the task was superseded by a newer
        # task with more recent executable (e.g. a new Try Server job on a newer
        # patchset on Rietveld).
        self.send_response({"ok": True})
예제 #18
0
    def post(self, task_id=None):
        # Unlike handshake and poll, we do not accept invalid keys here. This code
        # path is much more strict.
        request = self.parse_body()
        msg = log_unexpected_subset_keys(self.ACCEPTED_KEYS,
                                         self.REQUIRED_KEYS, request,
                                         self.request, 'bot', 'keys')
        if msg:
            self.abort_with_error(400, error=msg)

        bot_id = request['id']
        task_id = request['task_id']

        machine_type = None
        bot_info = bot_management.get_info_key(bot_id).get()
        if bot_info:
            machine_type = bot_info.machine_type

        # Make sure bot self-reported ID matches the authentication token. Raises
        # auth.AuthorizationError if not.
        bot_auth.validate_bot_id_and_fetch_config(bot_id, machine_type)

        bot_overhead = request.get('bot_overhead')
        cipd_pins = request.get('cipd_pins')
        cipd_stats = request.get('cipd_stats')
        cost_usd = request.get('cost_usd', 0)
        duration = request.get('duration')
        exit_code = request.get('exit_code')
        hard_timeout = request.get('hard_timeout')
        io_timeout = request.get('io_timeout')
        isolated_stats = request.get('isolated_stats')
        output = request.get('output')
        output_chunk_start = request.get('output_chunk_start')
        outputs_ref = request.get('outputs_ref')

        if (isolated_stats or cipd_stats) and bot_overhead is None:
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' %
                                   task_id)
            self.abort_with_error(
                400,
                error=
                'isolated_stats and cipd_stats require bot_overhead to be set'
                '\nbot_overhead: %s\nisolate_stats: %s' %
                (bot_overhead, isolated_stats))

        run_result_key = task_pack.unpack_run_result_key(task_id)
        performance_stats = None
        if bot_overhead is not None:
            performance_stats = task_result.PerformanceStats(
                bot_overhead=bot_overhead)
            if isolated_stats:
                download = isolated_stats.get('download') or {}
                upload = isolated_stats.get('upload') or {}

                def unpack_base64(d, k):
                    x = d.get(k)
                    if x:
                        return base64.b64decode(x)

                performance_stats.isolated_download = task_result.OperationStats(
                    duration=download.get('duration'),
                    initial_number_items=download.get('initial_number_items'),
                    initial_size=download.get('initial_size'),
                    items_cold=unpack_base64(download, 'items_cold'),
                    items_hot=unpack_base64(download, 'items_hot'))
                performance_stats.isolated_upload = task_result.OperationStats(
                    duration=upload.get('duration'),
                    items_cold=unpack_base64(upload, 'items_cold'),
                    items_hot=unpack_base64(upload, 'items_hot'))
            if cipd_stats:
                performance_stats.package_installation = task_result.OperationStats(
                    duration=cipd_stats.get('duration'))

        if output is not None:
            try:
                output = base64.b64decode(output)
            except UnicodeEncodeError as e:
                logging.error('Failed to decode output\n%s\n%r', e, output)
                output = output.encode('ascii', 'replace')
            except TypeError as e:
                # Save the output as-is instead. The error will be logged in ereporter2
                # and returning a HTTP 500 would only force the bot to stay in a retry
                # loop.
                logging.error('Failed to decode output\n%s\n%r', e, output)
        if outputs_ref:
            outputs_ref = task_request.FilesRef(**outputs_ref)

        if cipd_pins:
            cipd_pins = task_result.CipdPins(
                client_package=task_request.CipdPackage(
                    **cipd_pins['client_package']),
                packages=[
                    task_request.CipdPackage(**args)
                    for args in cipd_pins['packages']
                ])

        try:
            state = task_scheduler.bot_update_task(
                run_result_key=run_result_key,
                bot_id=bot_id,
                output=output,
                output_chunk_start=output_chunk_start,
                exit_code=exit_code,
                duration=duration,
                hard_timeout=hard_timeout,
                io_timeout=io_timeout,
                cost_usd=cost_usd,
                outputs_ref=outputs_ref,
                cipd_pins=cipd_pins,
                performance_stats=performance_stats)
            if not state:
                logging.info('Failed to update, please retry')
                self.abort_with_error(500,
                                      error='Failed to update, please retry')

            if state in (task_result.State.COMPLETED,
                         task_result.State.TIMED_OUT):
                action = 'task_completed'
            elif state == task_result.State.KILLED:
                action = 'task_killed'
            else:
                assert state in (task_result.State.BOT_DIED,
                                 task_result.State.RUNNING), state
                action = 'task_update'
            bot_management.bot_event(
                event_type=action,
                bot_id=bot_id,
                external_ip=self.request.remote_addr,
                authenticated_as=auth.get_peer_identity().to_bytes(),
                dimensions=None,
                state=None,
                version=None,
                quarantined=None,
                maintenance_msg=None,
                task_id=task_id,
                task_name=None)
        except ValueError as e:
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' % e)
            self.abort_with_error(400, error=str(e))
        except webob.exc.HTTPException:
            raise
        except Exception as e:
            logging.exception('Internal error: %s', e)
            self.abort_with_error(500, error=str(e))
        self.send_response({
            'must_stop': state == task_result.State.KILLED,
            'ok': True
        })
예제 #19
0
    def post(self, task_id=None):
        # Unlike handshake and poll, we do not accept invalid keys here. This code
        # path is much more strict.
        request = self.parse_body()
        msg = log_unexpected_subset_keys(self.ACCEPTED_KEYS,
                                         self.REQUIRED_KEYS, request,
                                         self.request, 'bot', 'keys')
        if msg:
            self.abort_with_error(400, error=msg)

        bot_id = request['id']
        cost_usd = request['cost_usd']
        task_id = request['task_id']

        bot_overhead = request.get('bot_overhead')
        duration = request.get('duration')
        exit_code = request.get('exit_code')
        hard_timeout = request.get('hard_timeout')
        io_timeout = request.get('io_timeout')
        isolated_stats = request.get('isolated_stats')
        output = request.get('output')
        output_chunk_start = request.get('output_chunk_start')
        outputs_ref = request.get('outputs_ref')

        if bool(isolated_stats) != (bot_overhead is not None):
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' %
                                   task_id)
            self.abort_with_error(
                400,
                error='Both bot_overhead and isolated_stats must be set '
                'simultaneously\nbot_overhead: %s\nisolated_stats: %s' %
                (bot_overhead, isolated_stats))

        run_result_key = task_pack.unpack_run_result_key(task_id)
        performance_stats = None
        if isolated_stats:
            download = isolated_stats['download']
            upload = isolated_stats['upload']
            performance_stats = task_result.PerformanceStats(
                bot_overhead=bot_overhead,
                isolated_download=task_result.IsolatedOperation(
                    duration=download['duration'],
                    initial_number_items=download['initial_number_items'],
                    initial_size=download['initial_size'],
                    items_cold=base64.b64decode(download['items_cold']),
                    items_hot=base64.b64decode(download['items_hot'])),
                isolated_upload=task_result.IsolatedOperation(
                    duration=upload['duration'],
                    items_cold=base64.b64decode(upload['items_cold']),
                    items_hot=base64.b64decode(upload['items_hot'])))

        if output is not None:
            try:
                output = base64.b64decode(output)
            except UnicodeEncodeError as e:
                logging.error('Failed to decode output\n%s\n%r', e, output)
                output = output.encode('ascii', 'replace')
            except TypeError as e:
                # Save the output as-is instead. The error will be logged in ereporter2
                # and returning a HTTP 500 would only force the bot to stay in a retry
                # loop.
                logging.error('Failed to decode output\n%s\n%r', e, output)
        if outputs_ref:
            outputs_ref = task_request.FilesRef(**outputs_ref)

        try:
            state = task_scheduler.bot_update_task(
                run_result_key=run_result_key,
                bot_id=bot_id,
                output=output,
                output_chunk_start=output_chunk_start,
                exit_code=exit_code,
                duration=duration,
                hard_timeout=hard_timeout,
                io_timeout=io_timeout,
                cost_usd=cost_usd,
                outputs_ref=outputs_ref,
                performance_stats=performance_stats)
            if not state:
                logging.info('Failed to update, please retry')
                self.abort_with_error(500,
                                      error='Failed to update, please retry')

            if state in (task_result.State.COMPLETED,
                         task_result.State.TIMED_OUT):
                action = 'task_completed'
            else:
                assert state in (task_result.State.BOT_DIED,
                                 task_result.State.RUNNING), state
                action = 'task_update'
            bot_management.bot_event(event_type=action,
                                     bot_id=bot_id,
                                     external_ip=self.request.remote_addr,
                                     dimensions=None,
                                     state=None,
                                     version=None,
                                     quarantined=None,
                                     task_id=task_id,
                                     task_name=None)
        except ValueError as e:
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' % e)
            self.abort_with_error(400, error=str(e))
        except webob.exc.HTTPException:
            raise
        except Exception as e:
            logging.exception('Internal error: %s', e)
            self.abort_with_error(500, error=str(e))

        # TODO(maruel): When a task is canceled, reply with 'DIE' so that the bot
        # reboots itself to abort the task abruptly. It is useful when a task hangs
        # and the timeout was set too long or the task was superseded by a newer
        # task with more recent executable (e.g. a new Try Server job on a newer
        # patchset on Rietveld).
        self.send_response({'ok': True})