Example #1
0
def get_dimensions(botobj):
    """Returns bot_config.py's get_attributes() dict."""
    # Importing this administrator provided script could have side-effects on
    # startup. That is why it is imported late.
    try:
        if _in_load_test_mode():
            # Returns a minimal set of dimensions so it doesn't run tasks by error.
            dimensions = os_utilities.get_dimensions()
            return {
                'id': dimensions['id'],
                'load_test': ['1'],
            }

        from config import bot_config
        out = bot_config.get_dimensions(botobj)
        if not isinstance(out, dict):
            raise ValueError('Unexpected type %s' % out.__class__)
        return out
    except Exception as e:
        logging.exception('get_dimensions() failed')
        try:
            out = os_utilities.get_dimensions()
            out['error'] = [str(e)]
            out['quarantined'] = ['1']
            return out
        except Exception as e:
            try:
                botid = os_utilities.get_hostname_short()
            except Exception as e2:
                botid = 'error_%s' % str(e2)
            return {
                'id': [botid],
                'error': ['%s\n%s' % (e, traceback.format_exc()[-2048:])],
                'quarantined': ['1'],
            }
Example #2
0
    def test_run_bot(self):
        # Test the run_bot() loop. Does not use self.bot.
        self.mock(time, "time", lambda: 126.0)

        class Foo(Exception):
            pass

        def poll_server(botobj, _):
            sleep_streak = botobj.state["sleep_streak"]
            self.assertEqual(botobj.remote, self.server)
            if sleep_streak == 5:
                raise Exception("Jumping out of the loop")
            return False

        self.mock(bot_main, "poll_server", poll_server)

        def post_error(botobj, e):
            self.assertEqual(self.server, botobj._remote)
            lines = e.splitlines()
            self.assertEqual("Jumping out of the loop", lines[0])
            self.assertEqual("Traceback (most recent call last):", lines[1])
            raise Foo("Necessary to get out of the loop")

        self.mock(bot.Bot, "post_error", post_error)

        self.mock(bot_main, "get_remote", lambda: self.server)

        self.expected_requests([("https://localhost:1/swarming/api/v1/bot/server_ping", {}, "foo", None)])

        with self.assertRaises(Foo):
            bot_main.run_bot(None)
        self.assertEqual(os_utilities.get_hostname_short(), os.environ["SWARMING_BOT_ID"])
Example #3
0
def get_dimensions(botobj):
  """Returns bot_config.py's get_attributes() dict."""
  # Importing this administrator provided script could have side-effects on
  # startup. That is why it is imported late.
  try:
    if _in_load_test_mode():
      # Returns a minimal set of dimensions so it doesn't run tasks by error.
      dimensions = os_utilities.get_dimensions()
      return {
        'id': dimensions['id'],
        'load_test': ['1'],
      }

    from config import bot_config
    out = bot_config.get_dimensions(botobj)
    if not isinstance(out, dict):
      raise ValueError('Unexpected type %s' % out.__class__)
    return out
  except Exception as e:
    logging.exception('get_dimensions() failed')
    try:
      out = os_utilities.get_dimensions()
      out['error'] = [str(e)]
      out['quarantined'] = ['1']
      return out
    except Exception as e:
      try:
        botid = os_utilities.get_hostname_short()
      except Exception as e2:
        botid = 'error_%s' % str(e2)
      return {
          'id': [botid],
          'error': ['%s\n%s' % (e, traceback.format_exc()[-2048:])],
          'quarantined': ['1'],
        }
def get_dimensions(bot=None):
    """Returns dict with the bot's dimensions.

    The dimensions are what are used to select the bot that can run each task.

    By default, the bot id will be automatically selected based on
    the hostname with os_utilities.get_dimensions(). This method
    overrides the default id returned by os_utilities.get_dimensions().

    Assume the bot's working directory is like BOT_ROOT/bot_23/
    we will parse the id "23" from the directory name and append it to the
    hostname to form the bot id. so the bot id would look like
    chromeos-server31-23

    See https://github.com/luci/luci-py/blob/master/appengine/
    swarming/doc/Magic-Values.md

    @returns: Dict with the bot's dimentions.

    """
    d = os_utilities.get_dimensions()
    m = re.match('.*/bot_([\d]+).*', os.getcwd())
    suffix = ''
    if m:
        suffix = '-' + m.group(1)
    d[u'id'] = [os_utilities.get_hostname_short() + suffix]
    return d
Example #5
0
def get_dimensions(bot):
  # pylint: disable=line-too-long
  """Returns dict with the bot's dimensions.
  The dimensions are what are used to select the bot that can run each task.
  The bot id will be automatically selected based on the hostname with
  os_utilities.get_dimensions(). If you want something more special, specify it
  in your bot_config.py and override the item 'id'.
  The dimensions returned here will be joined with server defined dimensions
  (extracted from bots.cfg config file based on the bot id). Server defined
  dimensions override the ones provided by the bot. See bot.Bot.dimensions for
  more information.
  See https://github.com/luci/luci-py/tree/master/appengine/swarming/doc/Magic-Values.md.
  Arguments:
  - bot: bot.Bot instance or None. See ../api/bot.py.
  """
  dimensions = os_utilities.get_dimensions()
  # The bot base directory is formatted like <HOME>/bots/<Id>
  id = '%s--%s' % (
      os_utilities.get_hostname_short(),
      os.path.basename(bot.base_dir))
  dimensions[u'id'] = [id]
  if id in _BOT_DEVICE_MAP:
    dimensions.update(_BOT_DEVICE_MAP[id].dimensions)
    global _DEVICE
    if not _DEVICE:
      device_ip = dimensions[u'device_ip']
      # use first device_ip in list of device IPs for pulling device attributes
      if device_ip:
        assert isinstance(device_ip, (list, tuple)), repr(device_ip)
        _DEVICE = Device(device_ip[0], id)
    if _DEVICE:
      dimensions[u'build'] = _DEVICE.build_fingerprint,
      dimensions[u'hardware'] = _DEVICE.hardware,
      dimensions[u'product'] = _DEVICE.product,
  # TODO(jonesmi): don't strip these anymore after swarming fix goes in for limit on # dimensions
  del dimensions[u'cores']
  del dimensions[u'cpu']
  del dimensions[u'gpu']
  del dimensions[u'machine_type']
  return dimensions
Example #6
0
    def test_run_bot(self):
        # Test the run_bot() loop. Does not use self.bot.
        self.mock(time, 'time', lambda: 126.0)

        class Foo(Exception):
            pass

        def poll_server(botobj, _):
            sleep_streak = botobj.state['sleep_streak']
            self.assertEqual(botobj.remote, self.server)
            if sleep_streak == 5:
                raise Exception('Jumping out of the loop')
            return False

        self.mock(bot_main, 'poll_server', poll_server)

        def post_error(botobj, e):
            self.assertEqual(self.server, botobj._remote)
            lines = e.splitlines()
            self.assertEqual('Jumping out of the loop', lines[0])
            self.assertEqual('Traceback (most recent call last):', lines[1])
            raise Foo('Necessary to get out of the loop')

        self.mock(bot.Bot, 'post_error', post_error)

        self.mock(bot_main, 'get_remote', lambda: self.server)

        self.expected_requests([
            (
                'https://localhost:1/swarming/api/v1/bot/server_ping',
                {},
                'foo',
                None,
            ),
        ])

        with self.assertRaises(Foo):
            bot_main.run_bot(None)
        self.assertEqual(os_utilities.get_hostname_short(),
                         os.environ['SWARMING_BOT_ID'])
Example #7
0
  def test_run_bot(self):
    # Test the run_bot() loop. Does not use self.bot.
    self.mock(time, 'time', lambda: 126.0)
    class Foo(Exception):
      pass

    def poll_server(botobj, _):
      sleep_streak = botobj.state['sleep_streak']
      self.assertEqual(botobj.remote, self.server)
      if sleep_streak == 5:
        raise Exception('Jumping out of the loop')
      return False
    self.mock(bot_main, 'poll_server', poll_server)

    def post_error(botobj, e):
      self.assertEqual(self.server, botobj._remote)
      lines = e.splitlines()
      self.assertEqual('Jumping out of the loop', lines[0])
      self.assertEqual('Traceback (most recent call last):', lines[1])
      raise Foo('Necessary to get out of the loop')
    self.mock(bot.Bot, 'post_error', post_error)

    self.mock(bot_main, 'get_remote', lambda: self.server)

    self.expected_requests(
        [
          (
            'https://localhost:1/swarming/api/v1/bot/server_ping',
            {}, 'foo', None,
          ),
        ])

    with self.assertRaises(Foo):
      bot_main.run_bot(None)
    self.assertEqual(
        os_utilities.get_hostname_short(), os.environ['SWARMING_BOT_ID'])
Example #8
0
def load_and_run(
    in_file, swarming_server, is_grpc, cost_usd_hour, start, out_file,
    run_isolated_flags, bot_file, auth_params_file):
  """Loads the task's metadata, prepares auth environment and executes the task.

  This may throw all sorts of exceptions in case of failure. It's up to the
  caller to trap them. These shall be considered 'internal_failure' instead of
  'failure' from a TaskRunResult standpoint.
  """
  auth_system = None
  local_auth_context = None
  task_result = None
  work_dir = os.path.dirname(out_file)

  def handler(sig, _):
    logging.info('Got signal %s', sig)
    raise ExitSignal(sig)

  try:
    with subprocess42.set_signal_handler([SIG_BREAK_OR_TERM], handler):
      # The work directory is guaranteed to exist since it was created by
      # bot_main.py and contains the manifest. Temporary files will be
      # downloaded there. It's bot_main.py that will delete the directory
      # afterward. Tests are not run from there.
      if not os.path.isdir(work_dir):
        raise InternalError('%s expected to exist' % work_dir)

      # Raises InternalError on errors.
      task_details = TaskDetails.load(in_file)

      # This will start a thread that occasionally reads bot authentication
      # headers from 'auth_params_file'. It will also optionally launch local
      # HTTP server that serves OAuth tokens to the task processes. We put
      # location of this service into a file referenced by LUCI_CONTEXT env var
      # below.
      if auth_params_file:
        try:
          auth_system = bot_auth.AuthSystem(auth_params_file)
          local_auth_context = auth_system.start()
        except bot_auth.AuthSystemError as e:
          raise InternalError('Failed to init auth: %s' % e)

      # Override LUCI_CONTEXT['local_auth']. If the task is not using auth,
      # do NOT inherit existing local_auth (if its there). Kick it out by
      # passing None.
      context_edits = {
        'local_auth': local_auth_context
      }

      # Extend existing LUCI_CONTEXT['swarming'], if any.
      if task_details.secret_bytes is not None:
        swarming = luci_context.read('swarming') or {}
        swarming['secret_bytes'] = task_details.secret_bytes
        context_edits['swarming'] = swarming

      # Returns bot authentication headers dict or raises InternalError.
      def headers_cb():
        try:
          if auth_system:
            return auth_system.get_bot_headers()
          return (None, None) # A timeout of "None" means "don't use auth"
        except bot_auth.AuthSystemError as e:
          raise InternalError('Failed to grab bot auth headers: %s' % e)

      # Make a client that can send request to Swarming using bot auth headers.
      grpc_proxy = ''
      if is_grpc:
        grpc_proxy = swarming_server
        swarming_server = ''
      # The hostname and work dir provided here don't really matter, since the
      # task runner is always called with a specific versioned URL.
      remote = remote_client.createRemoteClient(
          swarming_server, headers_cb, os_utilities.get_hostname_short(),
          work_dir, grpc_proxy)
      remote.initialize()

      # Let AuthSystem know it can now send RPCs to Swarming (to grab OAuth
      # tokens). There's a circular dependency here! AuthSystem will be
      # indirectly relying on its own 'get_bot_headers' method to authenticate
      # RPCs it sends through the provided client.
      if auth_system:
        auth_system.set_remote_client(remote)

      # Auth environment is up, start the command. task_result is dumped to
      # disk in 'finally' block.
      with luci_context.stage(_tmpdir=work_dir, **context_edits) as ctx_file:
        task_result = run_command(
            remote, task_details, work_dir, cost_usd_hour,
            start, run_isolated_flags, bot_file, ctx_file)
  except (ExitSignal, InternalError, remote_client.InternalError) as e:
    # This normally means run_command() didn't get the chance to run, as it
    # itself traps exceptions and will report accordingly. In this case, we want
    # the parent process to send the message instead.
    if not task_result:
      task_result = {
        u'exit_code': -1,
        u'hard_timeout': False,
        u'io_timeout': False,
        u'must_signal_internal_failure': str(e.message or 'unknown error'),
        u'version': OUT_VERSION,
      }

  finally:
    # We've found tests to delete the working directory work_dir when quitting,
    # causing an exception here. Try to recreate the directory if necessary.
    if not os.path.isdir(work_dir):
      os.mkdir(work_dir)
    if auth_system:
      auth_system.stop()
    with open(out_file, 'wb') as f:
      json.dump(task_result, f)