def test_task_idempotent_variable(self):
    # Test the edge case where GlobalConfig.reusable_task_age_secs is being
    # modified. This ensure TaskResultSummary.order(TRS.key) works.
    self.mock(random, 'getrandbits', lambda _: 0x88)
    cfg = config.settings()
    cfg.reusable_task_age_secs = 10
    cfg.store()

    # First task is idempotent.
    self._task_ran_successfully()

    # Second task is scheduled, first task is too old to be reused.
    second_ts = self.mock_now(self.now, 10)
    task_id = self._task_ran_successfully()

    # Now any of the 2 tasks could be reused. Assert the right one (the most
    # recent) is reused.
    cfg = config.settings()
    cfg.reusable_task_age_secs = 100
    cfg.store()

    # Third task is deduped against second task. That ensures ordering works
    # correctly.
    third_ts = self.mock_now(self.now, 20)
    self._task_deduped(third_ts, task_id, '1d69ba3ea8008810', second_ts)
Exemple #2
0
    def test_has_capacity_BotEvent(self):
        # Disable the memcache code path to confirm the DB based behavior.
        self.mock(task_queues, 'probably_has_capacity', lambda *_: None)

        d = {u'pool': [u'default'], u'os': [u'Ubuntu-16.04']}
        botid = 'id1'
        _bot_event(event_type='request_sleep',
                   dimensions={
                       'id': [botid],
                       'pool': ['default'],
                       'os': ['Ubuntu', 'Ubuntu-16.04']
                   })
        self.assertEqual(True, bot_management.has_capacity(d))

        or_dimensions = {
            u'pool': [u'default'],
            u'os': [u'Ubuntu-14.04|Ubuntu-16.04'],
        }

        # Delete the BotInfo, so the bot will disappear.
        bot_management.get_info_key(botid).delete()
        # The capacity is still found due to a recent BotEvent with this dimension.
        self.assertEqual(True, bot_management.has_capacity(d))
        self.assertEqual(True, bot_management.has_capacity(or_dimensions))

        self.mock_now(self.now, config.settings().bot_death_timeout_secs - 1)
        self.assertEqual(True, bot_management.has_capacity(d))
        self.assertEqual(True, bot_management.has_capacity(or_dimensions))

        self.mock_now(self.now, config.settings().bot_death_timeout_secs)
        self.assertEqual(False, bot_management.has_capacity(d))
        self.assertEqual(False, bot_management.has_capacity(or_dimensions))
Exemple #3
0
def apply_server_property_defaults(properties):
    """Fills ndb task properties with default values read from server settings."""
    cfg = config.settings()
    if not cfg:
        return

    cfg = config.settings()
    if cfg.isolate.default_server and cfg.isolate.default_namespace:
        properties.inputs_ref = properties.inputs_ref or task_request.FilesRef(
        )
        properties.inputs_ref.isolatedserver = (
            properties.inputs_ref.isolatedserver or cfg.isolate.default_server)
        properties.inputs_ref.namespace = (properties.inputs_ref.namespace
                                           or cfg.isolate.default_namespace)

    if cfg.HasField('cipd') and properties.cipd_input:
        properties.cipd_input.server = (properties.cipd_input.server
                                        or cfg.cipd.default_server)
        properties.cipd_input.client_package = (
            properties.cipd_input.client_package or task_request.CipdPackage())
        properties.cipd_input.client_package.package_name = (
            properties.cipd_input.client_package.package_name
            or cfg.cipd.default_client_package.package_name)
        properties.cipd_input.client_package.version = (
            properties.cipd_input.client_package.version
            or cfg.cipd.default_client_package.version)
    def test_task_idempotent_variable(self):
        # Test the edge case where GlobalConfig.reusable_task_age_secs is being
        # modified. This ensure TaskResultSummary.order(TRS.key) works.
        self.mock(random, 'getrandbits', lambda _: 0x88)
        cfg = config.settings()
        cfg.reusable_task_age_secs = 10
        cfg.store()

        # First task is idempotent.
        self._task_ran_successfully()

        # Second task is scheduled, first task is too old to be reused.
        second_ts = self.mock_now(self.now, 10)
        task_id = self._task_ran_successfully()

        # Now any of the 2 tasks could be reused. Assert the right one (the most
        # recent) is reused.
        cfg = config.settings()
        cfg.reusable_task_age_secs = 100
        cfg.store()

        # Third task is deduped against second task. That ensures ordering works
        # correctly.
        third_ts = self.mock_now(self.now, 20)
        self._task_deduped(third_ts, task_id, '1d69ba3ea8008810', second_ts)
Exemple #5
0
  def get(self):
    ndb.get_context().set_cache_policy(lambda _: False)
    if not config.settings().mp.enabled:
      logging.info('MP support is disabled')
      return

    if config.settings().mp.server:
      new_server = config.settings().mp.server
      current_config = machine_provider.MachineProviderConfiguration().cached()
      if new_server != current_config.instance_url:
        logging.info('Updating Machine Provider server to %s', new_server)
        current_config.modify(updated_by='', instance_url=new_server)

    lease_management.ensure_entities_exist()
    lease_management.drain_excess()
Exemple #6
0
 def test_config_conflict(self):
     self.set_as_admin()
     resp = self.app.get('/restricted/config')
     # TODO(maruel): Use beautifulsoup?
     params = {
         'bot_death_timeout_secs': 10 * 60,
         'google_analytics': 'foobar',
         'keyid': str(config.settings().key.integer_id() - 1),
         'reusable_task_age_secs': 30,
         'xsrf_token': self.get_xsrf_token(),
     }
     self.assertEqual('', config.settings().google_analytics)
     resp = self.app.post('/restricted/config', params)
     self.assertIn('Update conflict', resp)
     self.assertEqual('', config.settings().google_analytics)
Exemple #7
0
 def test_config_conflict(self):
   self.set_as_admin()
   resp = self.app.get('/restricted/config')
   # TODO(maruel): Use beautifulsoup?
   params = {
     'bot_death_timeout_secs': 10*60,
     'google_analytics': 'foobar',
     'keyid': str(config.settings().key.integer_id() - 1),
     'reusable_task_age_secs': 30,
     'xsrf_token': self.get_xsrf_token(),
   }
   self.assertEqual('', config.settings().google_analytics)
   resp = self.app.post('/restricted/config', params)
   self.assertIn('Update conflict', resp)
   self.assertEqual('', config.settings().google_analytics)
Exemple #8
0
def get_bot_version(host):
    """Retrieves the current bot version (SHA256) loaded on this server.

  The memcache is first checked for the version, otherwise the value
  is generated and then stored in the memcache.

  Returns:
    tuple(hash of the current bot version, dict of additional files).
  """
    signature = _get_signature(host)
    version = memcache.get('version-' + signature, namespace='bot_code')
    if version:
        return version, None

    # Need to calculate it.
    additionals = {'config/bot_config.py': get_bot_config().content}
    bot_dir = os.path.join(ROOT_DIR, 'swarming_bot')
    version = bot_archive.get_swarming_bot_version(bot_dir, host,
                                                   utils.get_app_version(),
                                                   additionals,
                                                   local_config.settings())
    memcache.set('version-' + signature,
                 version,
                 namespace='bot_code',
                 time=60)
    return version, additionals
Exemple #9
0
def render(name, params=None):
  """Shorthand to render a template."""
  out = {
    'google_analytics': config.settings().google_analytics,
  }
  out.update(params or {})
  return template.render(name, out)
 def test_list_ok(self):
   """Asserts that BotInfo is returned for the appropriate set of bots."""
   self.set_as_privileged_user()
   now = datetime.datetime(2010, 1, 2, 3, 4, 5, 6)
   now_str = unicode(now.strftime(self.DATETIME_FORMAT))
   self.mock_now(now)
   bot_management.bot_event(
       event_type='bot_connected', bot_id='id1', external_ip='8.8.4.4',
       dimensions={'foo': ['bar'], 'id': ['id1']}, state={'ram': 65},
       version='123456789', quarantined=False, task_id=None, task_name=None)
   expected = {
     u'items': [
       {
         u'bot_id': u'id1',
         u'dimensions': [
           {u'key': u'foo', u'value': [u'bar']},
           {u'key': u'id', u'value': [u'id1']},
         ],
         u'external_ip': u'8.8.4.4',
         u'first_seen_ts': now_str,
         u'is_dead': False,
         u'last_seen_ts': now_str,
         u'quarantined': False,
         u'version': u'123456789',
       },
     ],
     u'death_timeout': unicode(config.settings().bot_death_timeout_secs),
     u'now': unicode(now.strftime(self.DATETIME_FORMAT)),
   }
   request = swarming_rpcs.BotsRequest()
   response = self.call_api('list', body=message_to_dict(request))
   self.assertEqual(expected, response.json)
Exemple #11
0
 def get_content_security_policy(self):
     # We use iframes to display pages at display_server_url_template. Need to
     # allow it in CSP.
     csp = super(UIHandler, self).get_content_security_policy()
     tmpl = config.settings().display_server_url_template
     if tmpl:
         if tmpl.startswith('/'):
             csp['child-src'].append("'self'")
         else:
             # We assume the template specifies '%s' in its last path component.
             # We strip it to get a "parent" path that we can put into CSP. Note that
             # whitelisting an entire display server domain is unnecessary wide.
             csp['child-src'].append(tmpl[:tmpl.rfind('/') + 1])
     extra = config.settings().extra_child_src_csp_url
     csp['child-src'].extend(extra)
     return csp
Exemple #12
0
def _find_dupe_task(now, h):
    """Finds a previously run task that is also idempotent and completed.

  Fetch items that can be used to dedupe the task. See the comment for this
  property for more details.

  Do not use "task_result.TaskResultSummary.created_ts > oldest" here because
  this would require a composite index. It's unnecessary because TaskRequest.key
  is equivalent to decreasing TaskRequest.created_ts, ordering by key works as
  well and doesn't require a composite index.
  """
    # TODO(maruel): Make a reverse map on successful task completion so this
    # becomes a simple ndb.get().
    cls = task_result.TaskResultSummary
    q = cls.query(cls.properties_hash == h).order(cls.key)
    for i, dupe_summary in enumerate(q.iter(batch_size=1)):
        # It is possible for the query to return stale items.
        if (dupe_summary.state != task_result.State.COMPLETED
                or dupe_summary.failure):
            if i == 2:
                # Indexes are very inconsistent, give up.
                return None
            continue

        # Refuse tasks older than X days. This is due to the isolate server
        # dropping files.
        # TODO(maruel): The value should be calculated from the isolate server
        # setting and be unbounded when no isolated input was used.
        oldest = now - datetime.timedelta(
            seconds=config.settings().reusable_task_age_secs)
        if dupe_summary.created_ts <= oldest:
            return None
        return dupe_summary
    return None
Exemple #13
0
def get_swarming_bot_zip(host):
    """Returns a zipped file of all the files a bot needs to run.

  Returns:
    A string representing the zipped file's contents.
  """
    version, additionals, bot_config_rev = get_bot_version(host)
    cached_content, cached_bot_config_rev = get_cached_swarming_bot_zip(
        version)
    # TODO(crbug.com/1087981): Compare the bot config revisions.
    # Separate deployment to be safe.
    if cached_content and cached_bot_config_rev:
        logging.debug(
            'memcached bot code %s; %d bytes with bot_config.py rev: %s',
            version, len(cached_content), cached_bot_config_rev)
        return cached_content

    # Get the start bot script from the database, if present. Pass an empty
    # file if the files isn't present.
    bot_config, bot_config_rev = get_bot_config()
    additionals = additionals or {
        'config/bot_config.py': bot_config.content,
    }
    bot_dir = os.path.join(ROOT_DIR, 'swarming_bot')
    content, version = bot_archive.get_swarming_bot_zip(
        bot_dir, host, utils.get_app_version(), additionals,
        local_config.settings())
    logging.info('generated bot code %s; %d bytes with bot_config.py rev: %s',
                 version, len(content), bot_config_rev)
    cache_swarming_bot_zip(version, content, bot_config_rev)
    return content
Exemple #14
0
    def list(self, request):
        """Provides list of known bots.

    Deleted bots will not be listed.
    """
        logging.debug('%s', request)
        now = utils.utcnow()
        q = bot_management.BotInfo.query()
        try:
            q = bot_management.filter_dimensions(q, request.dimensions)
            q = bot_management.filter_availability(
                q, swarming_rpcs.to_bool(request.quarantined),
                swarming_rpcs.to_bool(request.in_maintenance),
                swarming_rpcs.to_bool(request.is_dead),
                swarming_rpcs.to_bool(request.is_busy),
                swarming_rpcs.to_bool(request.is_mp))
        except ValueError as e:
            raise endpoints.BadRequestException(str(e))

        bots, cursor = datastore_utils.fetch_page(q, request.limit,
                                                  request.cursor)
        return swarming_rpcs.BotList(
            cursor=cursor,
            death_timeout=config.settings().bot_death_timeout_secs,
            items=[message_conversion.bot_info_to_rpc(bot) for bot in bots],
            now=now)
Exemple #15
0
def bootstrap_dev_server_acls():
    """Adds localhost to IP whitelist and Swarming groups."""
    assert utils.is_local_dev_server()
    if auth.is_replica():
        return

    bots = auth.bootstrap_loopback_ips()

    auth_settings = config.settings().auth
    admins_group = auth_settings.admins_group
    users_group = auth_settings.users_group
    bot_bootstrap_group = auth_settings.bot_bootstrap_group

    auth.bootstrap_group(users_group, bots, 'Swarming users')
    auth.bootstrap_group(bot_bootstrap_group, bots, 'Bot bootstrap')

    # Add a swarming admin. [email protected] is used in
    # server_smoke_test.py
    admin = auth.Identity(auth.IDENTITY_USER, '*****@*****.**')
    auth.bootstrap_group(admins_group, [admin], 'Swarming administrators')

    # Add an instance admin (for easier manual testing when running dev server).
    auth.bootstrap_group(
        auth.ADMIN_GROUP,
        [auth.Identity(auth.IDENTITY_USER, '*****@*****.**')],
        'Users that can manage groups')
Exemple #16
0
    def setUp(self):
        super(ExternalSchedulerApiTestBatchMode, self).setUp()
        base = {
            'address': u'http://localhost:1',
            'id': u'foo',
            'dimensions': ['key1:value1', 'key2:value2'],
            'all_dimensions': None,
            'any_dimensions': None,
            'enabled': True,
            'allow_es_fallback': True,
        }
        self.cfg_foo = pools_config.ExternalSchedulerConfig(**base)
        base['id'] = u'hoe'
        self.cfg_hoe = pools_config.ExternalSchedulerConfig(**base)

        self.mock(external_scheduler, '_get_client', self._get_client)
        self._enqueue_orig = self.mock(utils, 'enqueue_task', self._enqueue)

        self._client = None

        # Setup the backend to handle task queues.
        self.app = webtest.TestApp(handlers_backend.create_application(True),
                                   extra_environ={
                                       'REMOTE_ADDR':
                                       self.source_ip,
                                       'SERVER_SOFTWARE':
                                       os.environ['SERVER_SOFTWARE'],
                                   })

        self.cfg = config.settings()
        self.cfg.enable_batch_es_notifications = True
        self.mock(config, 'settings', lambda: self.cfg)
Exemple #17
0
    def setUp(self):
        super(ExternalSchedulerApiTest, self).setUp()
        self.es_cfg = pools_config.ExternalSchedulerConfig(
            address=u'http://localhost:1',
            id=u'foo',
            dimensions=['key1:value1', 'key2:value2'],
            all_dimensions=None,
            any_dimensions=None,
            enabled=True,
            allow_es_fallback=True)

        # Make the values deterministic.
        self.mock_now(datetime.datetime(2014, 1, 2, 3, 4, 5, 6))
        self.mock(random, 'getrandbits', lambda _: 0x88)

        # Use the local fake client to external scheduler..
        self.mock(external_scheduler, '_get_client', self._get_client)
        self._client = None

        # Setup the backend to handle task queues.
        self.app = webtest.TestApp(handlers_backend.create_application(True),
                                   extra_environ={
                                       'REMOTE_ADDR':
                                       self.source_ip,
                                       'SERVER_SOFTWARE':
                                       os.environ['SERVER_SOFTWARE'],
                                   })
        self._enqueue_orig = self.mock(utils, 'enqueue_task', self._enqueue)

        cfg = config.settings()
        cfg.enable_batch_es_notifications = False
        self.mock(config, 'settings', lambda: cfg)
Exemple #18
0
def _check_dimension_acls(request):
    """Raises AuthorizationError if some requested dimensions are forbidden.

  Uses 'dimension_acls' field from the settings. See proto/config.proto.
  """
    dim_acls = config.settings().dimension_acls
    if not dim_acls or not dim_acls.entry:
        return  # not configured, this is fine

    ident = request.authenticated
    dims = request.properties.dimensions
    assert 'id' in dims or 'pool' in dims, dims  # see _validate_dimensions
    assert ident is not None  # see task_request.init_new_request

    # Forbid targeting individual bots for non-admins, but allow using 'id' if
    # 'pool' is used as well (so whoever can posts tasks to 'pool', can target an
    # individual bot in that pool).
    if 'id' in dims and 'pool' not in dims:
        if not acl.is_admin():
            raise auth.AuthorizationError(
                'Only Swarming administrators can post tasks with "id" dimension '
                'without specifying a "pool" dimension.')

    for k, v in sorted(dims.iteritems()):
        if not _can_use_dimension(dim_acls, ident, k, v):
            raise auth.AuthorizationError(
                'User %s is not allowed to schedule tasks with dimension "%s:%s"'
                % (ident.to_bytes(), k, v))
Exemple #19
0
    def get(self):
        ndb.get_context().set_cache_policy(lambda _: False)
        if not config.settings().mp.enabled:
            logging.info('MP support is disabled')
            return

        lease_management.compute_utilization()
Exemple #20
0
def render(name, params=None):
  """Shorthand to render a template."""
  out = {
    'google_analytics': config.settings().google_analytics,
  }
  out.update(params or {})
  return template.render(name, out)
Exemple #21
0
    def list(self, request):
        """Provides list of known bots.

    Deleted bots will not be listed.
    """
        logging.debug('%s', request)
        now = utils.utcnow()
        # Disable the in-process local cache. This is important, as there can be up
        # to a thousand entities loaded in memory, and this is a pure memory leak,
        # as there's no chance this specific instance will need these again,
        # therefore this leads to 'Exceeded soft memory limit' AppEngine errors.
        q = bot_management.BotInfo.query(default_options=ndb.QueryOptions(
            use_cache=False))
        try:
            q = bot_management.filter_dimensions(q, request.dimensions)
            q = bot_management.filter_availability(
                q, swarming_rpcs.to_bool(request.quarantined),
                swarming_rpcs.to_bool(request.in_maintenance),
                swarming_rpcs.to_bool(request.is_dead),
                swarming_rpcs.to_bool(request.is_busy))
        except ValueError as e:
            raise endpoints.BadRequestException(str(e))

        bots, cursor = datastore_utils.fetch_page(q, request.limit,
                                                  request.cursor)
        return swarming_rpcs.BotList(
            cursor=cursor,
            death_timeout=config.settings().bot_death_timeout_secs,
            items=[message_conversion.bot_info_to_rpc(bot) for bot in bots],
            now=now)
Exemple #22
0
  def test_api_bots(self):
    self.set_as_privileged_user()
    now = datetime.datetime(2010, 1, 2, 3, 4, 5, 6)
    now_str = unicode(now.strftime(utils.DATETIME_FORMAT))
    self.mock_now(now)
    bot_management.bot_event(
        event_type='bot_connected', bot_id='id1', external_ip='8.8.4.4',
        dimensions={'foo': ['bar'], 'id': ['id1']}, state={'ram': 65},
        version='123456789', quarantined=False, task_id=None, task_name=None)

    actual = self.app.get('/swarming/api/v1/client/bots', status=200).json
    expected = {
      u'items': [
        {
          u'dimensions': {u'foo': [u'bar'], u'id': [u'id1']},
          u'external_ip': u'8.8.4.4',
          u'first_seen_ts': now_str,
          u'id': u'id1',
          u'is_dead': False,
          u'last_seen_ts': now_str,
          u'quarantined': False,
          u'state': {u'ram': 65},
          u'task_id': None,
          u'task_name': None,
          u'version': u'123456789',
        },
      ],
      u'cursor': None,
      u'death_timeout': config.settings().bot_death_timeout_secs,
      u'limit': 1000,
      u'now': unicode(now.strftime(utils.DATETIME_FORMAT)),
    }
    self.assertEqual(expected, actual)

    # Test with limit.
    actual = self.app.get(
        '/swarming/api/v1/client/bots?limit=1', status=200).json
    expected['limit'] = 1
    self.assertEqual(expected, actual)

    bot_management.bot_event(
        event_type='bot_connected', bot_id='id2', external_ip='8.8.4.4',
        dimensions={'foo': ['bar'], 'id': ['id2']}, state={'ram': 65},
        version='123456789', quarantined=False, task_id=None, task_name=None)

    actual = self.app.get(
        '/swarming/api/v1/client/bots?limit=1', status=200).json
    expected['cursor'] = actual['cursor']
    self.assertTrue(actual['cursor'])
    self.assertEqual(expected, actual)

    # Test with cursor.
    actual = self.app.get(
        '/swarming/api/v1/client/bots?limit=1&cursor=%s' % actual['cursor'],
        status=200).json
    expected['cursor'] = None
    expected['items'][0]['dimensions']['id'] = [u'id2']
    expected['items'][0]['id'] = u'id2'
    self.assertEqual(expected, actual)
Exemple #23
0
  def get(self):
    limit = int(self.request.get('limit', 100))
    cursor = datastore_query.Cursor(urlsafe=self.request.get('cursor'))
    sort_by = self.request.get('sort_by', '__key__')
    if sort_by not in self.ACCEPTABLE_BOTS_SORTS:
      self.abort(400, 'Invalid sort_by query parameter')

    if sort_by[0] == '-':
      order = datastore_query.PropertyOrder(
          sort_by[1:], datastore_query.PropertyOrder.DESCENDING)
    else:
      order = datastore_query.PropertyOrder(
          sort_by, datastore_query.PropertyOrder.ASCENDING)

    now = utils.utcnow()
    cutoff = now - datetime.timedelta(
        seconds=config.settings().bot_death_timeout_secs)

    num_bots_busy_future = bot_management.BotInfo.query(
        bot_management.BotInfo.is_busy == True).count_async()
    num_bots_dead_future = bot_management.BotInfo.query(
        bot_management.BotInfo.last_seen_ts < cutoff).count_async()
    num_bots_quarantined_future = bot_management.BotInfo.query(
        bot_management.BotInfo.quarantined == True).count_async()
    num_bots_total_future = bot_management.BotInfo.query().count_async()
    fetch_future = bot_management.BotInfo.query().order(order).fetch_page_async(
        limit, start_cursor=cursor)

    # TODO(maruel): self.request.host_url should be the default AppEngine url
    # version and not the current one. It is only an issue when
    # version-dot-appid.appspot.com urls are used to access this page.
    version = bot_code.get_bot_version(self.request.host_url)
    bots, cursor, more = fetch_future.get_result()
    # Prefetch the tasks. We don't actually use the value here, it'll be
    # implicitly used by ndb local's cache when refetched by the html template.
    tasks = filter(None, (b.task for b in bots))
    ndb.get_multi(tasks)
    num_bots_busy = num_bots_busy_future.get_result()
    num_bots_dead = num_bots_dead_future.get_result()
    num_bots_quarantined = num_bots_quarantined_future.get_result()
    num_bots_total = num_bots_total_future.get_result()
    params = {
      'bots': bots,
      'current_version': version,
      'cursor': cursor.urlsafe() if cursor and more else '',
      'is_admin': acl.is_admin(),
      'is_privileged_user': acl.is_privileged_user(),
      'limit': limit,
      'now': now,
      'num_bots_alive': num_bots_total - num_bots_dead,
      'num_bots_busy': num_bots_busy,
      'num_bots_dead': num_bots_dead,
      'num_bots_quarantined': num_bots_quarantined,
      'sort_by': sort_by,
      'sort_options': self.SORT_OPTIONS,
      'xsrf_token': self.generate_xsrf_token(),
    }
    self.response.write(
        template.render('swarming/restricted_botslist.html', params))
Exemple #24
0
    def get(self):
        limit = int(self.request.get('limit', 100))
        cursor = datastore_query.Cursor(urlsafe=self.request.get('cursor'))
        sort_by = self.request.get('sort_by', '__key__')
        if sort_by not in self.ACCEPTABLE_BOTS_SORTS:
            self.abort(400, 'Invalid sort_by query parameter')

        if sort_by[0] == '-':
            order = datastore_query.PropertyOrder(
                sort_by[1:], datastore_query.PropertyOrder.DESCENDING)
        else:
            order = datastore_query.PropertyOrder(
                sort_by, datastore_query.PropertyOrder.ASCENDING)

        now = utils.utcnow()
        cutoff = now - datetime.timedelta(
            seconds=config.settings().bot_death_timeout_secs)

        num_bots_busy_future = bot_management.BotInfo.query(
            bot_management.BotInfo.is_busy == True).count_async()
        num_bots_dead_future = bot_management.BotInfo.query(
            bot_management.BotInfo.last_seen_ts < cutoff).count_async()
        num_bots_quarantined_future = bot_management.BotInfo.query(
            bot_management.BotInfo.quarantined == True).count_async()
        num_bots_total_future = bot_management.BotInfo.query().count_async()
        fetch_future = bot_management.BotInfo.query().order(
            order).fetch_page_async(limit, start_cursor=cursor)

        # TODO(maruel): self.request.host_url should be the default AppEngine url
        # version and not the current one. It is only an issue when
        # version-dot-appid.appspot.com urls are used to access this page.
        version = bot_code.get_bot_version(self.request.host_url)
        bots, cursor, more = fetch_future.get_result()
        # Prefetch the tasks. We don't actually use the value here, it'll be
        # implicitly used by ndb local's cache when refetched by the html template.
        tasks = filter(None, (b.task for b in bots))
        ndb.get_multi(tasks)
        num_bots_busy = num_bots_busy_future.get_result()
        num_bots_dead = num_bots_dead_future.get_result()
        num_bots_quarantined = num_bots_quarantined_future.get_result()
        num_bots_total = num_bots_total_future.get_result()
        params = {
            'bots': bots,
            'current_version': version,
            'cursor': cursor.urlsafe() if cursor and more else '',
            'is_admin': acl.is_admin(),
            'is_privileged_user': acl.is_privileged_user(),
            'limit': limit,
            'now': now,
            'num_bots_alive': num_bots_total - num_bots_dead,
            'num_bots_busy': num_bots_busy,
            'num_bots_dead': num_bots_dead,
            'num_bots_quarantined': num_bots_quarantined,
            'sort_by': sort_by,
            'sort_options': self.SORT_OPTIONS,
            'xsrf_token': self.generate_xsrf_token(),
        }
        self.response.write(
            template.render('swarming/restricted_botslist.html', params))
Exemple #25
0
 def common(self, note):
     params = {
         "cfg": config.settings(fresh=True),
         "note": note,
         "path": self.request.path,
         "xsrf_token": self.generate_xsrf_token(),
     }
     self.response.write(template.render("swarming/restricted_config.html", params))
Exemple #26
0
  def test_task_idempotent(self):
    self.mock(random, 'getrandbits', lambda _: 0x88)
    # First task is idempotent.
    task_id = self._task_ran_successfully()

    # Second task is deduped against first task.
    new_ts = self.mock_now(self.now, config.settings().reusable_task_age_secs-1)
    self._task_deduped(new_ts, task_id)
  def test_task_idempotent(self):
    self.mock(random, 'getrandbits', lambda _: 0x88)
    # First task is idempotent.
    task_id = self._task_ran_successfully()

    # Second task is deduped against first task.
    new_ts = self.mock_now(self.now, config.settings().reusable_task_age_secs-1)
    self._task_deduped(new_ts, task_id)
Exemple #28
0
 def common(self, note):
   params = {
     'cfg': config.settings(fresh=True),
     'note': note,
     'path': self.request.path,
     'xsrf_token': self.generate_xsrf_token(),
   }
   self.response.write(
       template.render('swarming/restricted_config.html', params))
 def get_content_security_policy(self):
     # We use iframes to display pages at display_server_url_template. Need to
     # allow it in CSP.
     csp = super(UIHandler, self).get_content_security_policy()
     csp['frame-src'].append("'self'")
     tmpl = config.settings().display_server_url_template
     if tmpl:
         if not tmpl.startswith('/'):
             # We assume the template specifies '%s' in its last path component.
             # We strip it to get a "parent" path that we can put into CSP. Note that
             # whitelisting an entire display server domain is unnecessary wide.
             csp['frame-src'].append(tmpl[:tmpl.rfind('/') + 1])
     extra = config.settings().extra_child_src_csp_url
     # Note that frame-src was once child-src, which was deprecated and support
     # was dropped by some browsers. See
     # https://bugs.chromium.org/p/chromium/issues/detail?id=839909
     csp['frame-src'].extend(extra)
     return csp
Exemple #30
0
def cron_update_bot_info():
    """Refreshes BotInfo.composite for dead bots."""
    dt = datetime.timedelta(seconds=config.settings().bot_death_timeout_secs)
    cutoff = utils.utcnow() - dt

    @ndb.tasklet
    def run(bot_key):
        bot = yield bot_key.get_async()
        if (bot and bot.last_seen_ts <= cutoff
                and (BotInfo.ALIVE in bot.composite
                     or BotInfo.DEAD not in bot.composite)):
            # Updating it recomputes composite.
            # TODO(maruel): BotEvent.
            yield bot.put_async()
            logging.info('DEAD: %s', bot.id)
            raise ndb.Return(1)
        raise ndb.Return(0)

    # The assumption here is that a cron job can churn through all the entities
    # fast enough. The number of dead bot is expected to be <10k. In practice the
    # average runtime is around 8 seconds.
    dead = 0
    seen = 0
    failed = 0
    try:
        futures = []
        for b in BotInfo.query(BotInfo.last_seen_ts <= cutoff):
            seen += 1
            if BotInfo.ALIVE in b.composite or BotInfo.DEAD not in b.composite:
                # Make sure the variable is not aliased.
                k = b.key
                # Unregister the bot from task queues since it can't reap anything.
                task_queues.cleanup_after_bot(k.parent())
                # Retry more often than the default 1. We do not want to throw too much
                # in the logs and there should be plenty of time to do the retries.
                f = datastore_utils.transaction_async(lambda: run(k),
                                                      retries=5)
                futures.append(f)
                if len(futures) >= 5:
                    ndb.Future.wait_any(futures)
                    for i in xrange(len(futures) - 1, -1, -1):
                        if futures[i].done():
                            try:
                                dead += futures.pop(i).get_result()
                            except datastore_utils.CommitError:
                                logging.warning('Failed to commit a Tx')
                                failed += 1
        for f in futures:
            try:
                dead += f.get_result()
            except datastore_utils.CommitError:
                logging.warning('Failed to commit a Tx')
                failed += 1
    finally:
        logging.debug('Seen %d bots, updated %d bots, failed %d tx', seen,
                      dead, failed)
    return dead
Exemple #31
0
 def common(self, note):
     params = {
         'cfg': config.settings(fresh=True),
         'note': note,
         'path': self.request.path,
         'xsrf_token': self.generate_xsrf_token(),
     }
     self.response.write(
         template.render('swarming/restricted_config.html', params))
Exemple #32
0
def has_capacity(dimensions):
    """Returns True if there's a reasonable chance for this task request
  dimensions set to be serviced by a bot alive.

  First look at the task queues, then look into the datastore to figure this
  out.
  """
    assert not ndb.in_transaction()
    # Look at the fast path.
    cap = task_queues.probably_has_capacity(dimensions)
    if cap is not None:
        return cap

    # Add it to the 'quick cache' to improve performance. This cache is kept for
    # the same duration as how long bots are considered still alive without a
    # ping. Useful if there's a single bot in the fleet for these dimensions and
    # it takes a long time to reboot. This is the case with Android with slow
    # initialization and some baremetal bots (thanks SCSI firmware!).
    seconds = config.settings().bot_death_timeout_secs

    @ndb.tasklet
    def run_query(flat):
        # Do a query. That's slower and it's eventually consistent.
        q = BotInfo.query()
        for f in flat:
            q = q.filter(BotInfo.dimensions_flat == f)

        num = yield q.count_async(limit=1)
        if num:
            logging.info('Found capacity via BotInfo: %s', flat)
            raise ndb.Return(True)

        # Search a bit harder. In this case, we're looking for BotEvent which would
        # be a bot that used to exist recently.
        cutoff = utils.utcnow() - datetime.timedelta(seconds=seconds)
        q = BotEvent.query(BotEvent.ts > cutoff)
        for f in flat:
            q = q.filter(BotEvent.dimensions_flat == f)
        num = yield q.count_async(limit=1)
        if num:
            logging.info('Found capacity via BotEvent: %s', flat)
            raise ndb.Return(True)
        raise ndb.Return(False)

    futures = [
        run_query(f)
        for f in task_queues.expand_dimensions_to_flats(dimensions)
    ]

    ndb.tasklets.Future.wait_all(futures)
    if any(f.get_result() for f in futures):
        task_queues.set_has_capacity(dimensions, seconds)
        return True

    logging.warning('HAS NO CAPACITY: %s', dimensions)
    return False
Exemple #33
0
def exponential_backoff(attempt_num):
  """Returns an exponential backoff value in seconds."""
  assert attempt_num >= 0
  if random.random() < _PROBABILITY_OF_QUICK_COMEBACK:
    # Randomly ask the bot to return quickly.
    return 1.0

  # If the user provided a max then use it, otherwise use default 60s.
  max_wait = config.settings().max_bot_sleep_time or 60.
  return min(max_wait, math.pow(1.5, min(attempt_num, 10) + 1))
 def test_list_ok(self):
     """Asserts that BotInfo is returned for the appropriate set of bots."""
     self.set_as_privileged_user()
     now = datetime.datetime(2010, 1, 2, 3, 4, 5, 6)
     now_str = unicode(now.strftime(self.DATETIME_FORMAT))
     self.mock_now(now)
     bot_management.bot_event(event_type='bot_connected',
                              bot_id='id1',
                              external_ip='8.8.4.4',
                              dimensions={
                                  'foo': ['bar'],
                                  'id': ['id1']
                              },
                              state={'ram': 65},
                              version='123456789',
                              quarantined=False,
                              task_id=None,
                              task_name=None)
     expected = {
         u'items': [
             {
                 u'bot_id':
                 u'id1',
                 u'dimensions': [
                     {
                         u'key': u'foo',
                         u'value': [u'bar']
                     },
                     {
                         u'key': u'id',
                         u'value': [u'id1']
                     },
                 ],
                 u'external_ip':
                 u'8.8.4.4',
                 u'first_seen_ts':
                 now_str,
                 u'is_dead':
                 False,
                 u'last_seen_ts':
                 now_str,
                 u'quarantined':
                 False,
                 u'version':
                 u'123456789',
             },
         ],
         u'death_timeout':
         unicode(config.settings().bot_death_timeout_secs),
         u'now':
         unicode(now.strftime(self.DATETIME_FORMAT)),
     }
     request = swarming_rpcs.BotsRequest()
     response = self.call_api('list', body=message_to_dict(request))
     self.assertEqual(expected, response.json)
  def test_task_idempotent_three(self):
    self.mock(random, 'getrandbits', lambda _: 0x88)
    # First task is idempotent.
    task_id = self._task_ran_successfully()

    # Second task is deduped against first task.
    new_ts = self.mock_now(self.now, config.settings().reusable_task_age_secs-1)
    self._task_deduped(new_ts, task_id)

    # Third task is scheduled, second task is not dedupable, first task is too
    # old.
    new_ts = self.mock_now(self.now, config.settings().reusable_task_age_secs)
    data = _gen_request(
        name='yay',
        user='******',
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}, idempotent=True))
    request = task_request.make_request(data, True)
    _result_summary = task_scheduler.schedule_request(request)
    # The task was enqueued for execution.
    self.assertNotEqual(None, task_to_run.TaskToRun.query().get().queue_number)
Exemple #36
0
 def _calc_composite(self):
     """Returns the value for BotInfo.composite, which permits quick searches."""
     timeout = config.settings().bot_death_timeout_secs
     is_dead = (utils.utcnow() -
                self.last_seen_ts).total_seconds() >= timeout
     return [
         self.IN_MAINTENANCE if self.maintenance_msg else
         self.NOT_IN_MAINTENANCE, self.DEAD if is_dead else self.ALIVE,
         self.QUARANTINED if self.quarantined else self.HEALTHY,
         self.BUSY if self.task_id else self.IDLE
     ]
Exemple #37
0
  def test_task_idempotent_three(self):
    self.mock(random, 'getrandbits', lambda _: 0x88)
    # First task is idempotent.
    task_id = self._task_ran_successfully()

    # Second task is deduped against first task.
    new_ts = self.mock_now(self.now, config.settings().reusable_task_age_secs-1)
    self._task_deduped(new_ts, task_id)

    # Third task is scheduled, second task is not dedupable, first task is too
    # old.
    new_ts = self.mock_now(self.now, config.settings().reusable_task_age_secs)
    data = _gen_request(
        name='yay',
        user='******',
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}, idempotent=True))
    request = task_request.make_request(data, True)
    _result_summary = task_scheduler.schedule_request(request)
    # The task was enqueued for execution.
    self.assertNotEqual(None, task_to_run.TaskToRun.query().get().queue_number)
 def list(self, request):
   """Provides list of bots."""
   now = utils.utcnow()
   cursor = datastore_query.Cursor(urlsafe=request.cursor)
   q = bot_management.BotInfo.query().order(bot_management.BotInfo.key)
   bots, cursor, more = q.fetch_page(request.batch_size, start_cursor=cursor)
   return swarming_rpcs.BotList(
       cursor=cursor.urlsafe() if cursor and more else None,
       death_timeout=config.settings().bot_death_timeout_secs,
       items=[message_conversion.bot_info_from_dict(bot.to_dict_with_now(
           now)) for bot in bots],
       now=now)
Exemple #39
0
def filter_availability(q, quarantined, is_dead, now):
    """Filters a ndb.Query for BotInfo based on quarantined/is_dead."""
    if quarantined is not None:
        q = q.filter(BotInfo.quarantined == quarantined)

    dt = datetime.timedelta(seconds=config.settings().bot_death_timeout_secs)
    timeout = now - dt
    if is_dead:
        q = q.filter(BotInfo.last_seen_ts < timeout)
    elif is_dead is not None:
        q = q.filter(BotInfo.last_seen_ts > timeout)
    return q
Exemple #40
0
  def get(self, page):
    if not page:
      page = 'swarming'

    params = {
      'client_id': config.settings().ui_client_id,
    }
    try:
      self.response.write(template.render(
        'swarming/public_%s_index.html' % page, params))
    except template.TemplateNotFound:
      self.abort(404, 'Page not found.')
Exemple #41
0
 def post(self):
     # Convert MultiDict into a dict.
     params = {k: self.request.params.getone(k) for k in self.request.params if k not in ("keyid", "xsrf_token")}
     params["bot_death_timeout_secs"] = int(params["bot_death_timeout_secs"])
     params["reusable_task_age_secs"] = int(params["reusable_task_age_secs"])
     cfg = config.settings(fresh=True)
     keyid = int(self.request.get("keyid", "0"))
     if cfg.key.integer_id() != keyid:
         self.common("Update conflict %s != %s" % (cfg.key.integer_id(), keyid))
         return
     cfg.populate(**params)
     cfg.store()
     self.common("Settings updated")
Exemple #42
0
    def get(self):
        logging.error('Unexpected old client')
        now = utils.utcnow()
        limit = int(self.request.get('limit', 1000))
        filter_by = self.request.get('filter')
        if filter_by and filter_by not in self.ACCEPTABLE_FILTERS:
            self.abort_with_error(400, error='Invalid filter query parameter')

        q = bot_management.BotInfo.query()

        if not filter_by:
            q = q.order(bot_management.BotInfo.key)
            recheck = lambda _: True
        elif filter_by == 'quarantined':
            q = q.order(bot_management.BotInfo.key)
            q = q.filter(bot_management.BotInfo.quarantined == True)
            recheck = lambda b: b.quarantined
        elif filter_by == 'is_dead':
            # The first sort key must be the same as used in the filter, otherwise
            # datastore raises BadRequestError.
            deadline = now - datetime.timedelta(
                seconds=config.settings().bot_death_timeout_secs)
            q = q.order(bot_management.BotInfo.last_seen_ts)
            q = q.filter(bot_management.BotInfo.last_seen_ts < deadline)
            recheck = lambda b: b.last_seen_ts < deadline
        else:
            raise AssertionError('Impossible')

        cursor = datastore_query.Cursor(urlsafe=self.request.get('cursor'))
        bots, cursor, more = q.fetch_page(limit, start_cursor=cursor)
        data = {
            'cursor': cursor.urlsafe() if cursor and more else None,
            'death_timeout': config.settings().bot_death_timeout_secs,
            'items': [b.to_dict_with_now(now) for b in bots if recheck(b)],
            'limit': limit,
            'now': now,
        }
        self.send_response(utils.to_json_encodable(data))
Exemple #43
0
  def get(self):
    logging.error('Unexpected old client')
    now = utils.utcnow()
    limit = int(self.request.get('limit', 1000))
    filter_by = self.request.get('filter')
    if filter_by and filter_by not in self.ACCEPTABLE_FILTERS:
      self.abort_with_error(400, error='Invalid filter query parameter')

    q = bot_management.BotInfo.query()

    if not filter_by:
      q = q.order(bot_management.BotInfo.key)
      recheck = lambda _: True
    elif filter_by == 'quarantined':
      q = q.order(bot_management.BotInfo.key)
      q = q.filter(bot_management.BotInfo.quarantined == True)
      recheck = lambda b: b.quarantined
    elif filter_by == 'is_dead':
      # The first sort key must be the same as used in the filter, otherwise
      # datastore raises BadRequestError.
      deadline = now - datetime.timedelta(
          seconds=config.settings().bot_death_timeout_secs)
      q = q.order(bot_management.BotInfo.last_seen_ts)
      q = q.filter(bot_management.BotInfo.last_seen_ts < deadline)
      recheck = lambda b: b.last_seen_ts < deadline
    else:
      raise AssertionError('Impossible')

    cursor = datastore_query.Cursor(urlsafe=self.request.get('cursor'))
    bots, cursor, more = q.fetch_page(limit, start_cursor=cursor)
    data = {
      'cursor': cursor.urlsafe() if cursor and more else None,
      'death_timeout': config.settings().bot_death_timeout_secs,
      'items': [b.to_dict_with_now(now) for b in bots if recheck(b)],
      'limit': limit,
      'now': now,
    }
    self.send_response(utils.to_json_encodable(data))
Exemple #44
0
  def list(self, request):
    """Provides list of known bots.

    Deleted bots will not be listed.
    """
    logging.info('%s', request)
    now = utils.utcnow()
    q = bot_management.BotInfo.query().order(bot_management.BotInfo.key)
    bots, cursor = datastore_utils.fetch_page(q, request.limit, request.cursor)
    return swarming_rpcs.BotList(
        cursor=cursor,
        death_timeout=config.settings().bot_death_timeout_secs,
        items=[message_conversion.bot_info_to_rpc(bot, now) for bot in bots],
        now=now)
Exemple #45
0
  def test_task_idempotent_old(self):
    self.mock(random, 'getrandbits', lambda _: 0x88)
    # First task is idempotent.
    self._task_ran_successfully()

    # Second task is scheduled, first task is too old to be reused.
    new_ts = self.mock_now(self.now, config.settings().reusable_task_age_secs)
    data = _gen_request_data(
        name='yay',
        user='******',
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}, idempotent=True))
    request = task_request.make_request(data)
    _result_summary = task_scheduler.schedule_request(request)
    # The task was enqueued for execution.
    self.assertNotEqual(None, task_to_run.TaskToRun.query().get().queue_number)
Exemple #46
0
 def post(self):
   # Convert MultiDict into a dict.
   params = {
     k: self.request.params.getone(k) for k in self.request.params
     if k not in ('keyid', 'xsrf_token')
   }
   params['bot_death_timeout_secs'] = int(params['bot_death_timeout_secs'])
   params['reusable_task_age_secs'] = int(params['reusable_task_age_secs'])
   cfg = config.settings(fresh=True)
   keyid = int(self.request.get('keyid', '0'))
   if cfg.key.integer_id() != keyid:
     self.common('Update conflict %s != %s' % (cfg.key.integer_id(), keyid))
     return
   cfg.populate(**params)
   cfg.store()
   self.common('Settings updated')
  def list(self, request):
    """Provides list of known bots.

    Deleted bots will not be listed.
    """
    logging.info('%s', request)
    now = utils.utcnow()
    q = bot_management.BotInfo.query().order(bot_management.BotInfo.key)
    for d in request.dimensions:
      if not ':' in d:
        raise endpoints.BadRequestException('Invalid dimensions')
      parts = d.split(':', 1)
      if len(parts) != 2 or any(i.strip() != i or not i for i in parts):
        raise endpoints.BadRequestException('Invalid dimensions')
      q = q.filter(bot_management.BotInfo.dimensions_flat == d)
    bots, cursor = datastore_utils.fetch_page(q, request.limit, request.cursor)
    return swarming_rpcs.BotList(
        cursor=cursor,
        death_timeout=config.settings().bot_death_timeout_secs,
        items=[message_conversion.bot_info_to_rpc(bot, now) for bot in bots],
        now=now)
    def test_api_bots(self):
        self.set_as_privileged_user()
        self.mock_now(datetime.datetime(2010, 1, 2, 3, 4, 5, 6))
        now_str = lambda: unicode(utils.utcnow().strftime(utils.DATETIME_FORMAT))
        bot_management.bot_event(
            event_type="bot_connected",
            bot_id="id1",
            external_ip="8.8.4.4",
            dimensions={"foo": ["bar"], "id": ["id1"]},
            state={"ram": 65},
            version="123456789",
            quarantined=False,
            task_id=None,
            task_name=None,
        )
        bot1_dict = {
            u"dimensions": {u"foo": [u"bar"], u"id": [u"id1"]},
            u"external_ip": u"8.8.4.4",
            u"first_seen_ts": now_str(),
            u"id": u"id1",
            u"is_dead": False,
            u"last_seen_ts": now_str(),
            u"quarantined": False,
            u"state": {u"ram": 65},
            u"task_id": None,
            u"task_name": None,
            u"version": u"123456789",
        }

        actual = self.app.get("/swarming/api/v1/client/bots", status=200).json
        expected = {
            u"items": [bot1_dict],
            u"cursor": None,
            u"death_timeout": config.settings().bot_death_timeout_secs,
            u"limit": 1000,
            u"now": now_str(),
        }
        self.assertEqual(expected, actual)

        # Test with limit.
        actual = self.app.get("/swarming/api/v1/client/bots?limit=1", status=200).json
        expected["limit"] = 1
        self.assertEqual(expected, actual)

        # Advance time to make bot1 dead to test filtering for dead bots.
        self.mock_now(datetime.datetime(2011, 1, 2, 3, 4, 5, 6))
        bot1_dict["is_dead"] = True
        expected["now"] = now_str()

        # Use quarantined bot to check filtering by 'quarantined' flag.
        bot_management.bot_event(
            event_type="bot_connected",
            bot_id="id2",
            external_ip="8.8.4.4",
            dimensions={"foo": ["bar"], "id": ["id2"]},
            state={"ram": 65},
            version="123456789",
            quarantined=True,
            task_id=None,
            task_name=None,
        )
        bot2_dict = {
            u"dimensions": {u"foo": [u"bar"], u"id": [u"id2"]},
            u"external_ip": u"8.8.4.4",
            u"first_seen_ts": now_str(),
            u"id": u"id2",
            u"is_dead": False,
            u"last_seen_ts": now_str(),
            u"quarantined": True,
            u"state": {u"ram": 65},
            u"task_id": None,
            u"task_name": None,
            u"version": u"123456789",
        }

        # Test limit + cursor: start the query.
        actual = self.app.get("/swarming/api/v1/client/bots?limit=1", status=200).json
        expected["cursor"] = actual["cursor"]
        expected["items"] = [bot1_dict]
        self.assertTrue(actual["cursor"])
        self.assertEqual(expected, actual)

        # Test limit + cursor: continue the query.
        actual = self.app.get("/swarming/api/v1/client/bots?limit=1&cursor=%s" % actual["cursor"], status=200).json
        expected["cursor"] = None
        expected["items"] = [bot2_dict]
        self.assertEqual(expected, actual)

        # Filtering by 'quarantined'.
        actual = self.app.get("/swarming/api/v1/client/bots?filter=quarantined", status=200).json
        expected["limit"] = 1000
        expected["cursor"] = None
        expected["items"] = [bot2_dict]
        self.assertEqual(expected, actual)

        # Filtering by 'is_dead'.
        actual = self.app.get("/swarming/api/v1/client/bots?filter=is_dead", status=200).json
        expected["limit"] = 1000
        expected["cursor"] = None
        expected["items"] = [bot1_dict]
        self.assertEqual(expected, actual)
Exemple #49
0
def schedule_request(request):
    """Creates and stores all the entities to schedule a new task request.

  The number of entities created is 3: TaskRequest, TaskResultSummary and
  TaskToRun.

  The TaskRequest is saved first as a DB transaction, then TaskResultSummary and
  TaskToRun are saved as a single DB RPC. The Search index is also updated
  in-between.

  Arguments:
  - request: is in the TaskRequest entity saved in the DB.

  Returns:
    TaskResultSummary. TaskToRun is not returned.
  """
    dupe_future = None
    if request.properties.idempotent:
        # Find a previously run task that is also idempotent and completed. Start a
        # query to fetch items that can be used to dedupe the task. See the comment
        # for this property for more details.
        #
        # Do not use "cls.created_ts > oldest" here because this would require a
        # composite index. It's unnecessary because TaskRequest.key is mostly
        # equivalent to decreasing TaskRequest.created_ts, ordering by key works as
        # well and doesn't require a composite index.
        cls = task_result.TaskResultSummary
        h = request.properties.properties_hash
        dupe_future = cls.query(cls.properties_hash == h).order(cls.key).get_async()

    # At this point, the request is now in the DB but not yet in a mode where it
    # can be triggered or visible. Index it right away so it is searchable. If any
    # of remaining calls in this function fail, the TaskRequest and Search
    # Document will simply point to an incomplete task, which will be ignored.
    #
    # Creates the entities TaskToRun and TaskResultSummary but do not save them
    # yet. TaskRunResult will be created once a bot starts it.
    task = task_to_run.new_task_to_run(request)
    result_summary = task_result.new_result_summary(request)

    # Do not specify a doc_id, as they are guaranteed to be monotonically
    # increasing and searches are done in reverse order, which fits exactly the
    # created_ts ordering. This is useful because DateField is precise to the date
    # (!) and NumberField is signed 32 bits so the best it could do with EPOCH is
    # second resolution up to year 2038.
    index = search.Index(name="requests")
    packed = task_pack.pack_result_summary_key(result_summary.key)
    doc = search.Document(
        fields=[search.TextField(name="name", value=request.name), search.AtomField(name="id", value=packed)]
    )
    # Even if it fails here, we're still fine, as the task is not "alive" yet.
    search_future = index.put_async([doc])

    now = utils.utcnow()

    if dupe_future:
        # Reuse the results!
        dupe_summary = dupe_future.get_result()
        # Refuse tasks older than X days. This is due to the isolate server dropping
        # files. https://code.google.com/p/swarming/issues/detail?id=197
        oldest = now - datetime.timedelta(seconds=config.settings().reusable_task_age_secs)
        if dupe_summary and dupe_summary.created_ts > oldest:
            # If there's a bug, commenting out this block is sufficient to disable the
            # functionality.
            # Setting task.queue_number to None removes it from the scheduling.
            task.queue_number = None
            _copy_entity(dupe_summary, result_summary, ("created_ts", "name", "user", "tags"))
            result_summary.properties_hash = None
            result_summary.try_number = 0
            result_summary.cost_saved_usd = result_summary.cost_usd
            # Only zap after.
            result_summary.costs_usd = []
            result_summary.deduped_from = task_pack.pack_run_result_key(dupe_summary.run_result_key)

    # Get parent task details if applicable.
    parent_task_keys = None
    if request.parent_task_id:
        parent_run_key = task_pack.unpack_run_result_key(request.parent_task_id)
        parent_task_keys = [parent_run_key, task_pack.run_result_key_to_result_summary_key(parent_run_key)]

    result_summary.modified_ts = now

    # Storing these entities makes this task live. It is important at this point
    # that the HTTP handler returns as fast as possible, otherwise the task will
    # be run but the client will not know about it.
    def run():
        ndb.put_multi([result_summary, task])

    def run_parent():
        # This one is slower.
        items = ndb.get_multi(parent_task_keys)
        k = result_summary.task_id
        for item in items:
            item.children_task_ids.append(k)
            item.modified_ts = now
        ndb.put_multi(items)

    # Raising will abort to the caller.
    futures = [datastore_utils.transaction_async(run)]
    if parent_task_keys:
        futures.append(datastore_utils.transaction_async(run_parent))

    try:
        search_future.get_result()
    except search.Error:
        # Do not abort the task, for now search is best effort.
        logging.exception("Put failed")

    for future in futures:
        # Check for failures, it would raise in this case, aborting the call.
        future.get_result()

    stats.add_task_entry(
        "task_enqueued", result_summary.key, dimensions=request.properties.dimensions, user=request.user
    )
    return result_summary
Exemple #50
0
 def is_enabled_callback():
   return config.settings().enable_ts_monitoring
  def test_api_bots(self):
    self.set_as_privileged_user()
    self.mock_now(datetime.datetime(2010, 1, 2, 3, 4, 5, 6))
    now_str = lambda: unicode(utils.utcnow().strftime(utils.DATETIME_FORMAT))
    bot_management.bot_event(
        event_type='bot_connected', bot_id='id1', external_ip='8.8.4.4',
        dimensions={'foo': ['bar'], 'id': ['id1']}, state={'ram': 65},
        version='123456789', quarantined=False, task_id=None, task_name=None)
    bot1_dict = {
      u'dimensions': {u'foo': [u'bar'], u'id': [u'id1']},
      u'external_ip': u'8.8.4.4',
      u'first_seen_ts': now_str(),
      u'id': u'id1',
      u'is_dead': False,
      u'last_seen_ts': now_str(),
      u'quarantined': False,
      u'state': {u'ram': 65},
      u'task_id': None,
      u'task_name': None,
      u'version': u'123456789',
    }

    actual = self.app.get('/swarming/api/v1/client/bots', status=200).json
    expected = {
      u'items': [bot1_dict],
      u'cursor': None,
      u'death_timeout': config.settings().bot_death_timeout_secs,
      u'limit': 1000,
      u'now': now_str(),
    }
    self.assertEqual(expected, actual)

    # Test with limit.
    actual = self.app.get(
        '/swarming/api/v1/client/bots?limit=1', status=200).json
    expected['limit'] = 1
    self.assertEqual(expected, actual)

    # Advance time to make bot1 dead to test filtering for dead bots.
    self.mock_now(datetime.datetime(2011, 1, 2, 3, 4, 5, 6))
    bot1_dict['is_dead'] = True
    expected['now'] = now_str()

    # Use quarantined bot to check filtering by 'quarantined' flag.
    bot_management.bot_event(
        event_type='bot_connected', bot_id='id2', external_ip='8.8.4.4',
        dimensions={'foo': ['bar'], 'id': ['id2']}, state={'ram': 65},
        version='123456789', quarantined=True, task_id=None, task_name=None)
    bot2_dict = {
      u'dimensions': {u'foo': [u'bar'], u'id': [u'id2']},
      u'external_ip': u'8.8.4.4',
      u'first_seen_ts': now_str(),
      u'id': u'id2',
      u'is_dead': False,
      u'last_seen_ts': now_str(),
      u'quarantined': True,
      u'state': {u'ram': 65},
      u'task_id': None,
      u'task_name': None,
      u'version': u'123456789',
    }

    # Test limit + cursor: start the query.
    actual = self.app.get(
        '/swarming/api/v1/client/bots?limit=1', status=200).json
    expected['cursor'] = actual['cursor']
    expected['items'] = [bot1_dict]
    self.assertTrue(actual['cursor'])
    self.assertEqual(expected, actual)

    # Test limit + cursor: continue the query.
    actual = self.app.get(
        '/swarming/api/v1/client/bots?limit=1&cursor=%s' % actual['cursor'],
        status=200).json
    expected['cursor'] = None
    expected['items'] = [bot2_dict]
    self.assertEqual(expected, actual)

    # Filtering by 'quarantined'.
    actual = self.app.get(
        '/swarming/api/v1/client/bots?filter=quarantined',
        status=200).json
    expected['limit'] = 1000
    expected['cursor'] = None
    expected['items'] = [bot2_dict]
    self.assertEqual(expected, actual)

    # Filtering by 'is_dead'.
    actual = self.app.get(
        '/swarming/api/v1/client/bots?filter=is_dead',
        status=200).json
    expected['limit'] = 1000
    expected['cursor'] = None
    expected['items'] = [bot1_dict]
    self.assertEqual(expected, actual)
Exemple #52
0
 def is_dead(self, now):
   """Returns True if the bot is dead based on timestamp now."""
   timeout = config.settings().bot_death_timeout_secs
   return (now - self.last_seen_ts).total_seconds() >= timeout