Example #1
0
  def main():
    # Restore state.
    if ref.last_added_dir:
      cur_path = get_next(ref.last_added_dir)
      logging.info('Starting from %s', cur_path)
    else:
      cur_path = '/'

    last_update_time = utils.utcnow()

    # Pre-order tree travesal.
    while cur_path:
      md_file_locs = []
      for e in entries[cur_path]:
        if e.type == 'blob' and e.name.lower().endswith('.md'):
          md_full_name = posixpath.join(cur_path, e.name)
          md_file_locs.append(root._replace(path=md_full_name))

      if md_file_locs:
        INDEX.put(_load_docs_async(md_file_locs, rev).get_result())

      if utils.utcnow() - last_update_time >= _update_frequency:
        try:
          ref.last_added_dir = cur_path
          ref.put()
          last_update_time = utils.utcnow()
          logging.info('Processed %s', cur_path)
        except db.Error:  # pragma: no coverage
          # Best effort. If we failed to persist last added dir, this is fine.
          # We can probably save it next time.
          # Anyway, we have a 3 hrs timeout before alerts start to fire.
          logging.warning(
              'Could not save Ref.last_added_dir: %s', traceback.format_exc())

      cur_path = get_next(cur_path)
Example #2
0
  def _complete(
        self, build_id, lease_key, result, result_details, failure_reason=None,
        url=None):
    """Marks a build as completed. Used by succeed and fail methods."""
    validate_lease_key(lease_key)
    validate_url(url)
    assert result in (model.BuildResult.SUCCESS, model.BuildResult.FAILURE)
    build = self._get_leasable_build(build_id)

    if build.status == model.BuildStatus.COMPLETED:
      if (build.result == result and
          build.failure_reason == failure_reason and
          build.result_details == result_details and
          build.url == url):
        return build
      raise errors.BuildIsCompletedError(
          'Build %s has already completed' % build_id)
    self._check_lease(build, lease_key)

    build.status = model.BuildStatus.COMPLETED
    build.status_changed_time = utils.utcnow()
    build.complete_time = utils.utcnow()
    build.result = result
    if url is not None:  # pragma: no branch
      build.url = url
    build.result_details = result_details
    build.failure_reason = failure_reason
    self._clear_lease(build)
    build.put()
    logging.info(
        'Build %s was completed. Status: %s. Result: %s',
        build.key.id(), build.status, build.result)
    self._enqueue_callback_task_if_needed(build)
    return build
Example #3
0
 def touch_all():
   make_group(
       name='A group',
       members=[ident('*****@*****.**'), ident('*****@*****.**')],
       description='Blah',
       comment='New group')
   make_ip_whitelist(
       name='An IP whitelist',
       subnets=['127.0.0.1/32'],
       description='Bluh',
       comment='New IP whitelist')
   a = model.AuthIPWhitelistAssignments(
       key=model.ip_whitelist_assignments_key(),
       assignments=[
         model.AuthIPWhitelistAssignments.Assignment(
           identity=ident('*****@*****.**'),
           ip_whitelist='An IP whitelist')
       ])
   a.record_revision(
       modified_by=ident('*****@*****.**'),
       modified_ts=utils.utcnow(),
       comment='New assignment')
   a.put()
   c = model.AuthGlobalConfig(
       key=model.root_key(),
       oauth_client_id='client_id',
       oauth_client_secret='client_secret',
       oauth_additional_client_ids=['1', '2'])
   c.record_revision(
       modified_by=ident('*****@*****.**'),
       modified_ts=utils.utcnow(),
       comment='Config change')
   c.put()
Example #4
0
  def txn():
    build = _get_leasable_build(build_id)

    if build.status == model.BuildStatus.COMPLETED:
      if (build.result == result and
          build.failure_reason == failure_reason and
          build.result_details == result_details and
          build.url == url):
        return build
      raise errors.BuildIsCompletedError(
        'Build %s has already completed' % build_id)
    _check_lease(build, lease_key)

    build.status = model.BuildStatus.COMPLETED
    build.status_changed_time = utils.utcnow()
    build.complete_time = utils.utcnow()
    build.result = result
    if url is not None:  # pragma: no branch
      build.url = url
    build.result_details = result_details
    build.failure_reason = failure_reason
    build.clear_lease()
    build.put()
    notifications.enqueue_callback_task_if_needed(build)
    return build
Example #5
0
  def test_set_from_run_result_two_tries(self):
    request = task_request.make_request(_gen_request(), True)
    result_summary = task_result.new_result_summary(request)
    run_result_1 = task_result.new_run_result(
        request, 1, 'localhost', 'abc', {})
    run_result_2 = task_result.new_run_result(
        request, 2, 'localhost', 'abc', {})
    self.assertTrue(result_summary.need_update_from_run_result(run_result_1))
    run_result_2.modified_ts = utils.utcnow()
    result_summary.modified_ts = utils.utcnow()
    ndb.transaction(lambda: ndb.put_multi((result_summary, run_result_2)))

    self.assertTrue(result_summary.need_update_from_run_result(run_result_1))
    run_result_1.modified_ts = utils.utcnow()
    result_summary.set_from_run_result(run_result_1, request)
    ndb.transaction(lambda: ndb.put_multi((result_summary, run_result_1)))

    result_summary = result_summary.key.get()
    self.assertFalse(result_summary.need_update_from_run_result(run_result_1))

    self.assertTrue(result_summary.need_update_from_run_result(run_result_2))
    run_result_2.modified_ts = utils.utcnow()
    result_summary.set_from_run_result(run_result_2, request)
    ndb.transaction(lambda: ndb.put_multi((result_summary, run_result_2)))
    result_summary = result_summary.key.get()

    self.assertEqual(2, result_summary.try_number)
    self.assertFalse(result_summary.need_update_from_run_result(run_result_1))
Example #6
0
 def test_add_with_leasing(self):
   build = self.service.add(
       bucket='chromium',
       lease_expiration_date=utils.utcnow () + datetime.timedelta(seconds=10),
   )
   self.assertTrue(build.is_leased)
   self.assertGreater(build.lease_expiration_date, utils.utcnow())
   self.assertIsNotNone(build.lease_key)
Example #7
0
 def test_cancel(self):
   self.test_build.put()
   build = service.cancel(self.test_build.key.id())
   self.assertEqual(build.status, model.BuildStatus.COMPLETED)
   self.assertEqual(build.status_changed_time, utils.utcnow())
   self.assertEqual(build.complete_time, utils.utcnow())
   self.assertEqual(build.result, model.BuildResult.CANCELED)
   self.assertEqual(
     build.cancelation_reason, model.CancelationReason.CANCELED_EXPLICITLY)
Example #8
0
 def setUp(self):
     super(TestOutput, self).setUp()
     request = task_request.make_request(_gen_request(), True)
     result_summary = task_result.new_result_summary(request)
     result_summary.modified_ts = utils.utcnow()
     ndb.transaction(result_summary.put)
     self.run_result = task_result.new_run_result(request, 1, "localhost", "abc", {})
     self.run_result.modified_ts = utils.utcnow()
     result_summary.set_from_run_result(self.run_result, request)
     ndb.transaction(lambda: ndb.put_multi((result_summary, self.run_result)))
     self.run_result = self.run_result.key.get()
Example #9
0
    def test_run_result_duration(self):
        run_result = task_result.TaskRunResult(
            started_ts=datetime.datetime(2010, 1, 1, 0, 0, 0), completed_ts=datetime.datetime(2010, 1, 1, 0, 2, 0)
        )
        self.assertEqual(datetime.timedelta(seconds=120), run_result.duration)
        self.assertEqual(datetime.timedelta(seconds=120), run_result.duration_now(utils.utcnow()))

        run_result = task_result.TaskRunResult(
            started_ts=datetime.datetime(2010, 1, 1, 0, 0, 0), abandoned_ts=datetime.datetime(2010, 1, 1, 0, 1, 0)
        )
        self.assertEqual(None, run_result.duration)
        self.assertEqual(None, run_result.duration_now(utils.utcnow()))
Example #10
0
 def test_ip_whitelists_serialization(self):
   """Serializing snapshot with non-trivial IP whitelist."""
   ip_whitelist = model.AuthIPWhitelist(
       key=model.ip_whitelist_key('bots'),
       subnets=['127.0.0.1/32'],
       description='Blah blah blah',
       created_ts=utils.utcnow(),
       created_by=model.Identity.from_bytes('user:[email protected]'),
       modified_ts=utils.utcnow(),
       modified_by=model.Identity.from_bytes('user:[email protected]'),
   )
   snapshot = make_snapshot_obj(ip_whitelists=[ip_whitelist])
   self.assert_serialization_works(snapshot)
Example #11
0
  def process_next_chunk(self, up_to):
    """Processes as much minutes starting at a specific time.

    This class should be called from a non-synchronized cron job, so it will
    rarely have more than one instance running at a time. Every entity is self
    contained so it explicitly handles datastore inconsistency.

    Arguments:
    - up_to: number of minutes to buffer between 'now' and the last minute to
             process. Will usually be in the range of 1 to 10.

    Returns:
      Number of self.stats_minute_cls generated, e.g. the number of minutes
      processed successfully by self_generate_snapshot. Returns None in case of
      failure.
    """
    count = 0
    original_minute = None
    try:
      now = utils.utcnow()
      original_minute = self._get_next_minute_to_process(now)
      next_minute = original_minute
      while now - next_minute >= datetime.timedelta(minutes=up_to):
        self._process_one_minute(next_minute)
        count += 1
        self._set_last_processed_time(next_minute)
        if self._max_minutes_per_process == count:
          break
        next_minute = next_minute + datetime.timedelta(minutes=1)
        now = utils.utcnow()
      return count
    except (
        datastore_errors.TransactionFailedError,
        logservice.Error,
        DeadlineExceededError) as e:
      msg = (
          'Got an error while processing stats.\n'
          'Processing started at %s; tried to get up to %smins from now; '
          'Processed %dmins\n%s') % (
          original_minute, up_to, count, e)
      if not count:
        logging.error(msg)
        # This is bad, it means that for the lifespan of the cron handler
        # (currently 10 minutes), it was unable to even process a single minute
        # worth of statistics.
        return None
      else:
        logging.warning(msg)
        # At least something was processed, so it's fine.
        return count
Example #12
0
  def test_set_from_run_result(self):
    request = task_request.make_request(_gen_request(), True)
    result_summary = task_result.new_result_summary(request)
    run_result = task_result.new_run_result(request, 1, 'localhost', 'abc', {})
    self.assertTrue(result_summary.need_update_from_run_result(run_result))
    result_summary.modified_ts = utils.utcnow()
    run_result.modified_ts = utils.utcnow()
    ndb.transaction(lambda: ndb.put_multi((result_summary, run_result)))

    self.assertTrue(result_summary.need_update_from_run_result(run_result))
    result_summary.set_from_run_result(run_result, request)
    ndb.transaction(lambda: ndb.put_multi([result_summary]))

    self.assertFalse(result_summary.need_update_from_run_result(run_result))
Example #13
0
 def modify(name, **kwargs):
   k = model.ip_whitelist_key(name)
   e = k.get()
   if not e:
     e = model.AuthIPWhitelist(
         key=k,
         created_by=model.Identity.from_bytes('user:[email protected]'),
         created_ts=utils.utcnow())
   e.record_revision(
       modified_by=model.Identity.from_bytes('user:[email protected]'),
       modified_ts=utils.utcnow(),
       comment='Comment')
   e.populate(**kwargs)
   e.put()
   model.replicate_auth_db()
Example #14
0
 def test_run_result_timeout(self):
   request = task_request.make_request(_gen_request(), True)
   result_summary = task_result.new_result_summary(request)
   result_summary.modified_ts = utils.utcnow()
   ndb.transaction(result_summary.put)
   run_result = task_result.new_run_result(request, 1, 'localhost', 'abc', {})
   run_result.state = task_result.State.TIMED_OUT
   run_result.completed_ts = utils.utcnow()
   run_result.modified_ts = utils.utcnow()
   result_summary.set_from_run_result(run_result, request)
   ndb.transaction(lambda: ndb.put_multi((run_result, result_summary)))
   run_result = run_result.key.get()
   result_summary = result_summary.key.get()
   self.assertEqual(True, run_result.failure)
   self.assertEqual(True, result_summary.failure)
Example #15
0
def _new_request_key():
  """Returns a valid ndb.Key for this entity.

  Task id is a 64 bits integer represented as a string to the user:
  - 1 highest order bits set to 0 to keep value positive.
  - 43 bits is time since _BEGINING_OF_THE_WORLD at 1ms resolution.
    It is good for 2**43 / 365.3 / 24 / 60 / 60 / 1000 = 278 years or 2010+278 =
    2288. The author will be dead at that time.
  - 16 bits set to a random value or a server instance specific value. Assuming
    an instance is internally consistent with itself, it can ensure to not reuse
    the same 16 bits in two consecutive requests and/or throttle itself to one
    request per millisecond.
    Using random value reduces to 2**-15 the probability of collision on exact
    same timestamp at 1ms resolution, so a maximum theoretical rate of 65536000
    requests/sec but an effective rate in the range of ~64k requests/sec without
    much transaction conflicts. We should be fine.
  - 4 bits set to 0x1. This is to represent the 'version' of the entity schema.
    Previous version had 0. Note that this value is XOR'ed in the DB so it's
    stored as 0xE. When the TaskRequest entity tree is modified in a breaking
    way that affects the packing and unpacking of task ids, this value should be
    bumped.

  The key id is this value XORed with task_pack.TASK_REQUEST_KEY_ID_MASK. The
  reason is that increasing key id values are in decreasing timestamp order.
  """
  request_id_base = datetime_to_request_base_id(utils.utcnow())
  # TODO(maruel): Use real randomness.
  suffix = random.getrandbits(16)
  return request_id_to_key(int(request_id_base | (suffix << 4) | 0x1))
Example #16
0
def _get_pending_auth_db_transaction():
    """Used internally to keep track of changes done in the transaction.

  Returns:
    Instance of _AuthDBTransaction (stored in the transaction context).
  """
    # Use transaction context to store the object. Note that each transaction
    # retry gets its own new transaction context which is what we need,
    # see ndb/context.py, 'transaction' tasklet, around line 982 (for SDK 1.9.6).
    assert ndb.in_transaction()
    ctx = ndb.get_context()
    txn = getattr(ctx, "_auth_db_transaction", None)
    if txn:
        return txn

    # Prepare next AuthReplicationState (auth_db_rev +1).
    state = replication_state_key().get()
    if not state:
        primary_id = app_identity.get_application_id() if is_primary() else None
        state = AuthReplicationState(key=replication_state_key(), primary_id=primary_id, auth_db_rev=0)
    # Assert Primary or Standalone. Replicas can't increment auth db revision.
    if not is_primary() and state.primary_id:
        raise ValueError("Can't modify Auth DB on Replica")
    state.auth_db_rev += 1
    state.modified_ts = utils.utcnow()

    # Store the state in the transaction context. Used in replicate_auth_db(...)
    # later.
    txn = _AuthDBTransaction(state)
    ctx._auth_db_transaction = txn
    return txn
def _clean_up_expired_leases(machine_type):
  """Cleans up expired leases.

  Prunes expired leases from machine_type.leases,
  but does not write the result to the datastore.

  Args:
    machine_type: MachineType instance.

  Returns:
    A list of leases that were removed.
  """
  active = []
  expired = []

  for request in machine_type.leases:
    if request.hostname and request.lease_expiration_ts <= utils.utcnow():
      logging.warning(
          'Request ID %s expired:\nHostname: %s\nExpiration: %s',
          request.client_request_id,
          request.hostname,
          request.lease_expiration_ts,
      )
      expired.append(request.hostname)
    else:
      active.append(request)

  machine_type.leases = active
  machine_type.pending_deletion.extend(expired)
  return expired
Example #18
0
 def create(entity):
   if entity.key.get():
     return False, {
       'http_code': 409,
       'text': 'Such %s already exists' % self.entity_kind_title,
     }
   entity.record_revision(
       modified_by=api.get_current_identity(),
       modified_ts=utils.utcnow(),
       comment='REST API')
   try:
     self.do_create(entity)
   except EntityOperationError as exc:
     return False, {
       'http_code': 409,
       'text': exc.message,
       'details': exc.details,
     }
   except ValueError as exc:
     return False, {
       'http_code': 400,
       'text': str(exc),
     }
   model.replicate_auth_db()
   return True, None
Example #19
0
 def modify(name, commit=True, **kwargs):
   k = model.group_key(name)
   e = k.get()
   if not e:
     e = model.AuthGroup(
         key=k,
         created_by=ident_a,
         created_ts=utils.utcnow())
   e.record_revision(
       modified_by=ident_a,
       modified_ts=utils.utcnow(),
       comment='Comment')
   e.populate(**kwargs)
   e.put()
   if commit:
     model.replicate_auth_db()
Example #20
0
 def store(self, updated_by=None):
   """Stores a new version of the config entity."""
   # Create an incomplete key, to be completed by 'store_new_version'.
   self.key = ndb.Key(self.__class__, None, parent=self._get_root_key())
   self.updated_by = updated_by or auth.get_current_identity()
   self.updated_ts = utils.utcnow()
   return datastore_utils.store_new_version(self, self._get_root_model())
Example #21
0
def _get_days_keys(handler, now, num_days):
  """Returns a list of ndb.Key to Snapshot instances."""
  today = (now or utils.utcnow()).date()
  return [
    handler.day_key(today - datetime.timedelta(days=i))
    for i in xrange(num_days)
  ]
Example #22
0
    def test_yield_run_result_keys_with_dead_bot(self):
        request = task_request.make_request(_gen_request(), True)
        result_summary = task_result.new_result_summary(request)
        result_summary.modified_ts = utils.utcnow()
        ndb.transaction(result_summary.put)
        run_result = task_result.new_run_result(request, 1, "localhost", "abc", {})
        run_result.completed_ts = utils.utcnow()
        run_result.modified_ts = utils.utcnow()
        result_summary.set_from_run_result(run_result, request)
        ndb.transaction(lambda: ndb.put_multi((run_result, result_summary)))

        self.mock_now(self.now + task_result.BOT_PING_TOLERANCE)
        self.assertEqual([], list(task_result.yield_run_result_keys_with_dead_bot()))

        self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1)
        self.assertEqual([run_result.key], list(task_result.yield_run_result_keys_with_dead_bot()))
Example #23
0
def _get_minutes_keys(handler, now, num_minutes):
  """Returns a list of ndb.Key to Snapshot instances."""
  now = now or utils.utcnow()
  return [
    handler.minute_key(now - datetime.timedelta(minutes=i))
    for i in xrange(num_minutes)
  ]
Example #24
0
def cancel_task(result_summary_key):
    """Cancels a task if possible."""
    request = task_pack.result_summary_key_to_request_key(result_summary_key).get()
    to_run_key = task_to_run.request_to_task_to_run_key(request)
    now = utils.utcnow()

    def run():
        to_run, result_summary = ndb.get_multi((to_run_key, result_summary_key))
        was_running = result_summary.state == task_result.State.RUNNING
        if not result_summary.can_be_canceled:
            return False, was_running
        to_run.queue_number = None
        result_summary.state = task_result.State.CANCELED
        result_summary.abandoned_ts = now
        result_summary.modified_ts = now

        futures = ndb.put_multi_async((to_run, result_summary))
        _maybe_pubsub_notify_via_tq(result_summary, request)
        for f in futures:
            f.check_success()

        return True, was_running

    try:
        ok, was_running = datastore_utils.transaction(run)
    except datastore_utils.CommitError as e:
        packed = task_pack.pack_result_summary_key(result_summary_key)
        return "Failed killing task %s: %s" % (packed, e)
    # Add it to the negative cache.
    task_to_run.set_lookup_cache(to_run_key, False)
    # TODO(maruel): Add stats.
    return ok, was_running
Example #25
0
def send_build_latency(buf, metric, bucket, must_be_never_leased):
  q = model.Build.query(
    model.Build.bucket == bucket,
    model.Build.status == model.BuildStatus.SCHEDULED,
  )
  if must_be_never_leased:
    q = q.filter(model.Build.never_leased == True)
  else:
    # Reuse the index that has never_leased
    q = q.filter(model.Build.never_leased.IN((True, False)))

  now = utils.utcnow()
  avg_latency = 0.0
  count = 0
  dist = gae_ts_mon.Distribution(gae_ts_mon.GeometricBucketer())
  for e in q.iter(projection=[model.Build.create_time]):
    latency = (now - e.create_time).total_seconds()
    dist.add(latency)
    avg_latency += latency
    count += 1
  if count > 0:
    avg_latency /= count
  set_gauge(buf, bucket, metric, avg_latency)
  DISTRIBUTION_OF_CLOUD_METRIC[metric].set(
      dist, {FIELD_BUCKET: bucket}, target_fields=GLOBAL_TARGET_FIELDS)
Example #26
0
def make_session(userinfo, expiration_sec):
    """Creates new AuthOpenIDSession (and AuthOpenIDUser if needed) entities.

  Args:
    userinfo: user profile dict as returned by handle_authorization_code.
    expiration_sec: how long (in seconds) the session if allowed to live.

  Returns:
    AuthOpenIDSession already persisted in the datastore.
  """
    now = utils.utcnow()

    # Refresh datastore entry for logged in user.
    user = AuthOpenIDUser(
        id=userinfo["sub"].encode("ascii"),
        last_session_ts=now,
        email=userinfo["email"],
        name=userinfo["name"],
        picture=userinfo["picture"],
    )

    # Create a new session that expires at the same time when cookie signature
    # expires. ID is autogenerated by the datastore.
    session = AuthOpenIDSession(
        parent=user.key,
        created_ts=now,
        expiration_ts=now + datetime.timedelta(seconds=expiration_sec),
        email=user.email,
        name=user.name,
        picture=user.picture,
    )

    ndb.transaction(lambda: ndb.put_multi([user, session]))
    assert session.key.integer_id()
    return session
Example #27
0
def release_lease(lease_key):
  """Releases a lease on a machine.

  Args:
    lease_key: ndb.Key for a models.LeaseRequest entity.
  """
  lease = lease_key.get()
  if not lease:
    logging.warning('LeaseRequest not found: %s', lease_key)
    return
  if not lease.released:
    logging.warning('LeaseRequest not released:\n%s', lease)
    return

  lease.released = False
  if not lease.machine_id:
    logging.warning('LeaseRequest has no associated machine:\n%s', lease)
    lease.put()
    return

  machine = ndb.Key(models.CatalogMachineEntry, lease.machine_id).get()
  if not machine:
    logging.error('LeaseRequest has non-existent machine leased:\n%s', lease)
    lease.put()
    return

  # Just expire the lease now and let MachineReclamationProcessor handle it.
  logging.info('Expiring LeaseRequest:\n%s', lease)
  now = utils.utcnow()
  lease.response.lease_expiration_ts = utils.datetime_to_timestamp(
      now) / 1000 / 1000
  machine.lease_expiration_ts = now
  ndb.put_multi([lease, machine])
Example #28
0
def bootstrap_ip_whitelist(name, subnets, description=""):
    """Adds subnets to an IP whitelist if not there yet.

  Can be used on local dev appserver to add 127.0.0.1 to IP whitelist during
  startup. Should not be used from request handlers.

  Args:
    name: IP whitelist name to add a subnet to.
    subnets: IP subnet to add (as a list of strings).
    description: description of IP whitelist (if new entity is created).

  Returns:
    True if entry was added, False if it is already there or subnet is invalid.
  """
    assert isinstance(subnets, (list, tuple))
    try:
        subnets = [ipaddr.normalize_subnet(s) for s in subnets]
    except ValueError:
        return False
    key = ip_whitelist_key(name)
    entity = key.get()
    if entity and all(s in entity.subnets for s in subnets):
        return False
    now = utils.utcnow()
    if not entity:
        entity = AuthIPWhitelist(
            key=key, description=description, created_ts=now, created_by=get_service_self_identity()
        )
    for s in subnets:
        if s not in entity.subnets:
            entity.subnets.append(s)
    entity.record_revision(modified_by=get_service_self_identity(), modified_ts=now, comment="Bootstrap")
    entity.put()
    replicate_auth_db()
    return True
Example #29
0
  def txn():
    build = yield model.Build.get_by_id_async(build_id)
    if not build or build.lease_expiration_date is None:  # pragma: no cover
      return
    is_expired = build.lease_expiration_date <= utils.utcnow()
    if not is_expired:  # pragma: no cover
      return

    assert build.status != model.BuildStatus.COMPLETED, (
      'Completed build is leased')
    build.clear_lease()
    build.status = model.BuildStatus.SCHEDULED
    build.status_changed_time = utils.utcnow()
    build.url = None
    yield build.put_async()
    raise ndb.Return(build)
Example #30
0
def get_open_session(cookie):
    """Returns AuthOpenIDSession if it exists and still open.

  Args:
    cookie: value of 'oid_session' cookie.

  Returns:
    AuthOpenIDSession if cookie is valid and session has not expired yet.
  """
    if not cookie:
        return None
    try:
        decoded = SessionCookie.validate(cookie)
    except tokens.InvalidTokenError as e:
        logging.warning("Bad session cookie: %s", e)
        return None
    try:
        session_id = struct.unpack("<q", decoded["ss"])[0]
    except struct.error as exc:
        logging.warning("Bad session cookie, bad 'ss' field %r: %s", decoded["ss"], exc)
        return None
    # Relying on ndb in-process cache here to avoid refetches from datastore.
    session = ndb.Key(AuthOpenIDUser, decoded["sub"], AuthOpenIDSession, session_id).get()
    if not session:
        logging.warning("Requesting non-existing session: %r", decoded)
        return None
    # Already closed or expired?
    if session.closed_ts is not None or utils.utcnow() > session.expiration_ts:
        return None
    return session
Example #31
0
def rebuild_task_cache(payload):
  """Rebuilds the TaskDimensions cache.

  This function is called in two cases:
  - A new kind of task request dimensions never seen before
  - The TaskDimensions.valid_until_ts expired

  It is a cache miss, query all the bots and check for the ones which can run
  the task.

  Warning: There's a race condition, where the TaskDimensions query could be
  missing some instances due to eventually coherent consistency in the BotInfo
  query. This only happens when there's new request dimensions set AND a bot
  that can run this task recently showed up.

  Runtime expectation: the scale on the number of bots that can run the task,
  via BotInfo.dimensions_flat filtering. As there can be tens of thousands of
  bots that can run the task, this can take a long time to store all the
  entities on a new kind of request. As such, it must be called in the backend.

  Arguments:
  - payload: dict as created in assert_task() with:
    - 'dimensions': dict of task dimensions to refresh
    - 'dimensions_hash': precalculated hash for dimensions
    - 'valid_until_ts': expiration_ts + _EXTEND_VALIDITY for how long this cache
      is valid

  Returns:
    True if everything was processed, False if it needs to be retried.
  """
  data = json.loads(payload)
  logging.debug('rebuild_task_cache(%s)', data)
  dimensions = data[u'dimensions']
  dimensions_hash = int(data[u'dimensions_hash'])
  valid_until_ts = utils.parse_datetime(data[u'valid_until_ts'])
  dimensions_flat = []
  for k, values in dimensions.iteritems():
    for v in values:
      dimensions_flat.append(u'%s:%s' % (k, v))
  dimensions_flat.sort()

  now = utils.utcnow()
  # Number of BotTaskDimensions entities that were created/updated in the DB.
  updated = 0
  # Number of BotTaskDimensions entities that matched this task queue.
  viable = 0
  try:
    pending = []
    for bot_task_key in _yield_BotTaskDimensions_keys(
        dimensions_hash, dimensions_flat):
      viable += 1
      future = _refresh_BotTaskDimensions(
          bot_task_key, dimensions_flat, now, valid_until_ts)
      pending.append(future)
      done, pending = _cap_futures(pending)
      updated += sum(1 for i in done if i)
    updated += sum(1 for i in _flush_futures(pending) if i)
    # The main reason for this log entry is to confirm the timing of the first
    # part (updating BotTaskDimensions) versus the second part (updating
    # TaskDimensions).
    logging.debug('Updated %d BotTaskDimensions', updated)

    # Done updating, now store the entity. Must use a transaction as there could
    # be other dimensions set in the entity.
    task_dims_key = _get_task_dims_key(dimensions_hash, dimensions)

    # First do a dry run. If the dry run passes, skip the transaction.
    #
    # The rationale is that there can be concurrent trigger of this taskqueue
    # (rebuild-cache) when there are conccurent task creation. The dry run cost
    # not much overhead and if it passes, it saves transaction contention.
    #
    # The transaction contention can be problematic on pool with a high
    # cardinality of the dimension sets.
    obj = task_dims_key.get()
    if not obj or obj.assert_request(now, valid_until_ts, dimensions_flat):
      def _run():
        action = None
        obj = task_dims_key.get()
        if not obj:
          obj = TaskDimensions(key=task_dims_key)
          action = 'created'
        if obj.assert_request(now, valid_until_ts, dimensions_flat):
          if action:
            action = 'updated'
          if not obj.sets:
            obj.key.delete()
            return 'deleted'
          obj.put()
        return action

      # Do an adhoc transaction instead of using datastore_utils.transaction().
      # This is because for some pools, the transaction rate may be so high that
      # it's impossible to get a good performance on the entity group.
      #
      # In practice the odds of conflict is ~nil, because it can only conflict
      # if a TaskDimensions.set has more than one item and this happens when
      # there's a hash conflict (odds 2^31) plus two concurrent task running
      # simultaneously (over _EXTEND_VALIDITY period) so we can do it in a more
      # adhoc way.
      key = '%s:%s' % (
          task_dims_key.parent().string_id(), task_dims_key.string_id())
      if not memcache.add(key, True, time=60, namespace='task_queues_tx'):
        # add() returns True if the entry was added, False otherwise. That's
        # perfect.
        logging.warning('Failed taking pseudo-lock for %s; reenqueuing', key)
        return False
      try:
        action = _run()
      finally:
        memcache.delete(key, namespace='task_queues_tx')

      # Keeping this dead code for now, in case we find a solution for the
      # transaction rate issue.
      #try:
      #  action = datastore_utils.transaction(_run, retries=4)
      #except datastore_utils.CommitError as e:
      #  # Still log an error but no need for a stack trace in the logs. It is
      #  # important to surface that the call failed so the task queue is
      #  # retried later.
      #  logging.warning('Failed updating TaskDimensions: %s; reenqueuing', e)
      #  return False

      if action:
        # Only log at info level when something was done. This helps scanning
        # quickly the logs.
        logging.info('Did %s', action)
      else:
        logging.debug('Did nothing')
    else:
      logging.debug('Skipped transaction!')
  finally:
    # Any of the _refresh_BotTaskDimensions() calls above could throw. Still log
    # how far we went.
    msg = (
      'rebuild_task_cache(%d) in %.3fs. viable bots: %d; bots updated: %d\n%s')
    dims = '\n'.join('  ' + d for d in dimensions_flat)
    duration = (utils.utcnow()-now).total_seconds()
    # Only log at info level when something was done. This helps scanning
    # quickly the logs.
    if updated:
      logging.info(msg, dimensions_hash, duration, viable, updated, dims)
    else:
      logging.debug(msg, dimensions_hash, duration, viable, updated, dims)
  return True
Example #32
0
 def post(self):
     q = model.ContentEntry.query(
         model.ContentEntry.expiration_ts < utils.utcnow()).iter(
             keys_only=True)
     total = incremental_delete(q, delete=model.delete_entry_and_gs_entry)
     logging.info('Deleting %s expired entries', total)
Example #33
0
def bot_event(event_type, bot_id, external_ip, dimensions, state, version,
              quarantined, task_id, task_name, **kwargs):
    """Records when a bot has queried for work.

  Arguments:
  - event: event type.
  - bot_id: bot id.
  - external_ip: IP address as seen by the HTTP handler.
  - dimensions: Bot's dimensions as self-reported. If not provided, keep
        previous value.
  - state: ephemeral state of the bot. It is expected to change constantly. If
        not provided, keep previous value.
  - version: swarming_bot.zip version as self-reported. Used to spot if a bot
        failed to update promptly. If not provided, keep previous value.
  - quarantined: bool to determine if the bot was declared quarantined.
  - task_id: packed task id if relevant. Set to '' to zap the stored value.
  - task_name: task name if relevant. Zapped when task_id is zapped.
  - kwargs: optional values to add to BotEvent relevant to event_type.
  """
    if not bot_id:
        return

    # Retrieve the previous BotInfo and update it.
    info_key = get_info_key(bot_id)
    bot_info = info_key.get() or BotInfo(key=info_key)
    bot_info.last_seen_ts = utils.utcnow()
    bot_info.external_ip = external_ip
    if dimensions:
        bot_info.dimensions = dimensions
    if state:
        bot_info.state = state
    if quarantined is not None:
        bot_info.quarantined = quarantined
    if task_id is not None:
        bot_info.task_id = task_id
    if task_name:
        bot_info.task_name = task_name
    if version is not None:
        bot_info.version = version

    if event_type in ('request_sleep', 'task_update'):
        # Handle this specifically. It's not much of an even worth saving a BotEvent
        # for but it's worth updating BotInfo. The only reason BotInfo is GET is to
        # keep first_seen_ts. It's not necessary to use a transaction here since no
        # BotEvent is being added, only last_seen_ts is really updated.
        bot_info.put()
        return

    event = BotEvent(parent=get_root_key(bot_id),
                     event_type=event_type,
                     external_ip=external_ip,
                     dimensions=bot_info.dimensions,
                     quarantined=bot_info.quarantined,
                     state=bot_info.state,
                     task_id=bot_info.task_id,
                     version=bot_info.version,
                     **kwargs)

    if event_type in ('task_completed', 'task_error'):
        # Special case to keep the task_id in the event but not in the summary.
        bot_info.task_id = ''

    datastore_utils.store_new_version(event, BotRoot, [bot_info])
Example #34
0
def _update_ip_whitelist_config(root, rev, conf):
    assert ndb.in_transaction(), 'Must be called in AuthDB transaction'
    assert isinstance(root, model.AuthGlobalConfig), root
    now = utils.utcnow()

    # Existing whitelist entities.
    existing_ip_whitelists = {
        e.key.id(): e
        for e in model.AuthIPWhitelist.query(ancestor=model.root_key())
    }

    # Whitelists being imported (name => [list of subnets]).
    imported_ip_whitelists = _resolve_ip_whitelist_includes(conf.ip_whitelists)

    to_put = []
    to_delete = []

    # New or modified IP whitelists.
    for name, subnets in imported_ip_whitelists.items():
        # An existing whitelist and it hasn't changed?
        wl = existing_ip_whitelists.get(name)
        if wl and wl.subnets == subnets:
            continue
        # Update the existing (to preserve auth_db_prev_rev) or create a new one.
        if not wl:
            wl = model.AuthIPWhitelist(
                key=model.ip_whitelist_key(name),
                created_ts=now,
                created_by=model.get_service_self_identity())
        wl.subnets = subnets
        wl.description = 'Imported from ip_whitelist.cfg'
        to_put.append(wl)

    # Removed IP whitelists.
    for wl in existing_ip_whitelists.values():
        if wl.key.id() not in imported_ip_whitelists:
            to_delete.append(wl)

    # Update assignments. Don't touch created_ts and created_by for existing ones.
    ip_whitelist_assignments = (model.ip_whitelist_assignments_key().get()
                                or model.AuthIPWhitelistAssignments(
                                    key=model.ip_whitelist_assignments_key()))
    existing = {(a.identity.to_bytes(), a.ip_whitelist): a
                for a in ip_whitelist_assignments.assignments}
    updated = []
    for a in conf.assignments:
        key = (a.identity, a.ip_whitelist_name)
        if key in existing:
            updated.append(existing[key])
        else:
            new_one = model.AuthIPWhitelistAssignments.Assignment(
                identity=model.Identity.from_bytes(a.identity),
                ip_whitelist=a.ip_whitelist_name,
                comment='Imported from ip_whitelist.cfg at rev %s' %
                rev.revision,
                created_ts=now,
                created_by=model.get_service_self_identity())
            updated.append(new_one)

    # Something has changed?
    updated_keys = [(a.identity.to_bytes(), a.ip_whitelist) for a in updated]
    if set(updated_keys) != set(existing):
        ip_whitelist_assignments.assignments = updated
        to_put.append(ip_whitelist_assignments)

    if not to_put and not to_delete:
        return False
    comment = 'Importing ip_whitelist.cfg at rev %s' % rev.revision
    for e in to_put:
        e.record_revision(modified_by=model.get_service_self_identity(),
                          modified_ts=now,
                          comment=comment)
    for e in to_delete:
        e.record_deletion(modified_by=model.get_service_self_identity(),
                          modified_ts=now,
                          comment=comment)
    futures = []
    futures.extend(ndb.put_multi_async(to_put))
    futures.extend(ndb.delete_multi_async(e.key for e in to_delete))
    for f in futures:
        f.check_success()
    return True
Example #35
0
 def ip_whitelist(name, **kwargs):
     return model.AuthIPWhitelist(key=model.ip_whitelist_key(name),
                                  created_ts=utils.utcnow(),
                                  modified_ts=utils.utcnow(),
                                  **kwargs)
Example #36
0
def yield_expired_task_to_run():
  """Yields all the expired TaskToRun still marked as available."""
  now = utils.utcnow()
  for task in TaskToRun.query().filter(TaskToRun.queue_number > 0):
    if task.expiration_ts < now:
      yield task
Example #37
0
def cron_trigger_tasks(table_name, baseurl, task_name, max_seconds,
                       max_taskqueues):
    """Triggers tasks to send rows to BigQuery via time based slicing.

  It triggers one task queue task per 1 minute slice of time to process. It will
  process up to 2 minutes before now, and up to _OLDEST_BACKFILL time ago. It
  tries to go both ways, both keeping up with new items, and backfilling.

  This function is expected to be called once per minute.

  This function stores in BqState the timestamps of last enqueued events.

  Arguments:
    table_name: BigQuery table name. Also used as the key id to use for the
        BqState entity.
    baseurl: url for the task queue, which the timestamp will be appended to.
    task_name: task name the URL represents.
    max_seconds: the maximum amount of time to run; after which it should stop
        early even if there is still work to do.
    max_items: the maximum number of task queue triggered; to limit parallel
        execution.

  Returns:
    total number of task queue tasks triggered.
  """
    RECENT_OFFSET = datetime.timedelta(seconds=120)
    minute = datetime.timedelta(seconds=60)

    start = utils.utcnow()
    start_rounded = datetime.datetime(*start.timetuple()[:5])
    recent_cutoff = start_rounded - RECENT_OFFSET
    oldest_cutoff = start_rounded - _OLDEST_BACKFILL

    total = 0
    state = BqState.get_by_id(table_name)
    if not state or not state.oldest:
        # Flush the previous state, especially if it was the deprecated way, and
        # start over.
        state = BqState(id=table_name,
                        ts=start,
                        oldest=recent_cutoff - minute,
                        recent=recent_cutoff)
        state.put()

    # First trigger recent row(s).
    while total < max_taskqueues:
        if (state.recent >= recent_cutoff
                or (utils.utcnow() - start).total_seconds() >= max_seconds):
            break
        t = state.recent.strftime(u'%Y-%m-%dT%H:%M')
        if not utils.enqueue_task(baseurl + t, task_name):
            logging.warning('Enqueue for %t failed')
            break
        state.recent += minute
        state.ts = utils.utcnow()
        state.put()
        total += 1

    # Then trigger for backfill of old rows.
    while total < max_taskqueues:
        if (state.oldest <= oldest_cutoff
                or (utils.utcnow() - start).total_seconds() >= max_seconds):
            break
        t = state.oldest.strftime(u'%Y-%m-%dT%H:%M')
        if not utils.enqueue_task(baseurl + t, task_name):
            logging.warning('Enqueue for %t failed')
            break
        state.oldest -= minute
        state.ts = utils.utcnow()
        state.put()
        total += 1

    logging.info('Triggered %d tasks for %s', total, table_name)
    return total
Example #38
0
 def group(name, **kwargs):
     return model.AuthGroup(key=model.group_key(name),
                            created_ts=utils.utcnow(),
                            modified_ts=utils.utcnow(),
                            **kwargs)
Example #39
0
def update_build_async(req, _res, ctx, _mask):
    """Update build as in given request.

  For now, only update build steps.

  Does not mutate res.
  In practice, clients does not need the response, they just want to provide
  the data.
  """
    now = utils.utcnow()
    logging.debug('updating build %d', req.build.id)

    # Validate the request.
    build_steps = model.BuildSteps.make(req.build)
    validation.validate_update_build_request(req, build_steps)

    update_paths = set(req.update_mask.paths)

    if not (yield user.can_update_build_async()):
        raise StatusError(
            prpc.StatusCode.PERMISSION_DENIED,
            '%s not permitted to update build' %
            auth.get_current_identity().to_bytes())

    @ndb.tasklet
    def get_async():
        build = yield model.Build.get_by_id_async(req.build.id)
        if not build:
            raise not_found('Cannot update nonexisting build with id %s',
                            req.build.id)
        if build.is_ended:
            raise failed_precondition('Cannot update an ended build')

        # Ensure a SCHEDULED build does not have steps or output.
        final_status = (req.build.status if 'build.status' in update_paths else
                        build.proto.status)
        if final_status == common_pb2.SCHEDULED:
            if 'build.steps' in update_paths:
                raise invalid_argument(
                    'cannot update steps of a SCHEDULED build; '
                    'either set status to non-SCHEDULED or do not update steps'
                )
            if any(p.startswith('build.output.') for p in update_paths):
                raise invalid_argument(
                    'cannot update build output fields of a SCHEDULED build; '
                    'either set status to non-SCHEDULED or do not update build output'
                )

        raise ndb.Return(build)

    build = yield get_async()
    validate_build_token(build, ctx)

    # Prepare a field mask to merge req.build into model.Build.proto.
    # Exclude fields that are stored elsewhere.
    # Note that update_paths was (indirectly) validated by validation.py
    # against a whitelist.
    model_build_proto_mask = protoutil.Mask.from_field_mask(
        field_mask_pb2.FieldMask(
            paths=list(update_paths -
                       {'build.steps', 'build.output.properties'})),
        rpc_pb2.UpdateBuildRequest.DESCRIPTOR,
        update_mask=True,
    ).submask('build')

    out_prop_bytes = req.build.output.properties.SerializeToString()

    @ndb.transactional_tasklet
    def txn_async():
        build = yield get_async()

        orig_status = build.status

        futures = []

        if 'build.output.properties' in update_paths:
            futures.append(
                model.BuildOutputProperties(
                    key=model.BuildOutputProperties.key_for(build.key),
                    properties=out_prop_bytes,
                ).put_async())

        if model_build_proto_mask:
            # Merge the rest into build.proto using model_build_proto_mask.
            model_build_proto_mask.merge(req.build, build.proto)

        # If we are updating build status, update some other dependent fields
        # and schedule notifications.
        status_changed = orig_status != build.proto.status
        if status_changed:
            if build.proto.status == common_pb2.STARTED:
                if not build.proto.HasField('start_time'):  # pragma: no branch
                    build.proto.start_time.FromDatetime(now)
                futures.append(events.on_build_starting_async(build))
            else:
                assert model.is_terminal_status(
                    build.proto.status), build.proto.status
                build.clear_lease()
                if not build.proto.HasField('end_time'):  # pragma: no branch
                    build.proto.end_time.FromDatetime(now)
                futures.append(events.on_build_completing_async(build))

        if 'build.steps' in update_paths:
            # TODO(crbug.com/936892): reject requests with a terminal build status
            # and incomplete steps, when
            # https://chromium-review.googlesource.com/c/infra/infra/+/1553291
            # is deployed.
            futures.append(build_steps.put_async())
        elif build.is_ended:
            futures.append(
                model.BuildSteps.cancel_incomplete_steps_async(
                    req.build.id, build.proto.end_time))

        futures.append(build.put_async())
        yield futures
        raise ndb.Return(build, status_changed)

    build, status_changed = yield txn_async()
    if status_changed:
        if build.proto.status == common_pb2.STARTED:
            events.on_build_started(build)
        else:
            assert model.is_terminal_status(
                build.proto.status), build.proto.status
            events.on_build_completed(build)
Example #40
0
def bot_update_task(
    run_result_key, bot_id, output, output_chunk_start,
    exit_code, duration, hard_timeout, io_timeout, cost_usd, outputs_ref):
  """Updates a TaskRunResult and TaskResultSummary, along TaskOutput.

  Arguments:
  - run_result_key: ndb.Key to TaskRunResult.
  - bot_id: Self advertised bot id to ensure it's the one expected.
  - output: Data to append to this command output.
  - output_chunk_start: Index of output in the stdout stream.
  - exit_code: Mark that this command is terminated.
  - duration: Time spent in seconds for this command.
  - hard_timeout: Bool set if an hard timeout occured.
  - io_timeout: Bool set if an I/O timeout occured.
  - cost_usd: Cost in $USD of this task up to now.
  - outputs_ref: Serialized FilesRef instance or None.

  Invalid states, these are flat out refused:
  - A command is updated after it had an exit code assigned to.

  Returns:
    tuple(bool, bool); first is if the update succeeded, second is if the task
    completed.
  """
  assert output_chunk_start is None or isinstance(output_chunk_start, int)
  assert output is None or isinstance(output, str)
  if cost_usd is not None and cost_usd < 0.:
    raise ValueError('cost_usd must be None or greater or equal than 0')

  result_summary_key = task_pack.run_result_key_to_result_summary_key(
      run_result_key)
  request_key = task_pack.result_summary_key_to_request_key(result_summary_key)
  request_future = request_key.get_async()
  server_version = utils.get_app_version()
  packed = task_pack.pack_run_result_key(run_result_key)
  request = request_future.get_result()
  now = utils.utcnow()

  def run():
    # 2 consecutive GETs, one PUT.
    run_result_future = run_result_key.get_async()
    result_summary_future = result_summary_key.get_async()
    run_result = run_result_future.get_result()
    if not run_result:
      result_summary_future.wait()
      return None, False, 'is missing'

    if run_result.bot_id != bot_id:
      result_summary_future.wait()
      return None, False, 'expected bot (%s) but had update from bot %s' % (
          run_result.bot_id, bot_id)

    # This happens as an HTTP request is retried when the DB write succeeded but
    # it still returned HTTP 500.
    if len(run_result.exit_codes) and exit_code is not None:
      if run_result.exit_codes[0] != exit_code:
        result_summary_future.wait()
        return None, False, 'got 2 different exit_codes; %d then %d' % (
            run_result.exit_codes[0], exit_code)

    if (duration is None) != (exit_code is None):
      result_summary_future.wait()
      return None, False, (
          'had unexpected duration; expected iff a command completes; index %d'
          % len(run_result.exit_codes))

    if exit_code is not None:
      # The command completed.
      run_result.durations.append(duration)
      run_result.exit_codes.append(exit_code)

    if outputs_ref:
      run_result.outputs_ref = task_request.FilesRef(**outputs_ref)

    task_completed = len(run_result.exit_codes) == 1
    if run_result.state in task_result.State.STATES_RUNNING:
      if hard_timeout or io_timeout:
        run_result.state = task_result.State.TIMED_OUT
        run_result.completed_ts = now
      elif task_completed:
        run_result.state = task_result.State.COMPLETED
        run_result.completed_ts = now

    run_result.signal_server_version(server_version)
    to_put = [run_result]
    if output:
      # This does 1 multi GETs. This also modifies run_result in place.
      to_put.extend(
          run_result.append_output(0, output, output_chunk_start or 0))

    run_result.cost_usd = max(cost_usd, run_result.cost_usd or 0.)
    run_result.modified_ts = now

    result_summary = result_summary_future.get_result()
    if (result_summary.try_number and
        result_summary.try_number > run_result.try_number):
      # The situation where a shard is retried but the bot running the previous
      # try somehow reappears and reports success, the result must still show
      # the last try's result. We still need to update cost_usd manually.
      result_summary.costs_usd[run_result.try_number-1] = run_result.cost_usd
      result_summary.modified_ts = now
    else:
      result_summary.set_from_run_result(run_result, request)

    to_put.append(result_summary)
    ndb.put_multi(to_put)
    return run_result, task_completed, None

  try:
    run_result, task_completed, error = datastore_utils.transaction(run)
  except datastore_utils.CommitError:
    # It is important that the caller correctly surface this error.
    return False, False

  if run_result:
    _update_stats(run_result, bot_id, request, task_completed)
  if error:
      logging.error('Task %s %s', packed, error)
  return True, task_completed
Example #41
0
def schedule_request(request):
  """Creates and stores all the entities to schedule a new task request.

  The number of entities created is 3: TaskRequest, TaskResultSummary and
  TaskToRun.

  The TaskRequest is saved first as a DB transaction, then TaskResultSummary and
  TaskToRun are saved as a single DB RPC. The Search index is also updated
  in-between.

  Arguments:
  - request: is in the TaskRequest entity saved in the DB.

  Returns:
    TaskResultSummary. TaskToRun is not returned.
  """
  dupe_future = None
  if request.properties.idempotent:
    # Find a previously run task that is also idempotent and completed. Start a
    # query to fetch items that can be used to dedupe the task. See the comment
    # for this property for more details.
    #
    # Do not use "cls.created_ts > oldest" here because this would require a
    # composite index. It's unnecessary because TaskRequest.key is mostly
    # equivalent to decreasing TaskRequest.created_ts, ordering by key works as
    # well and doesn't require a composite index.
    cls = task_result.TaskResultSummary
    h = request.properties.properties_hash
    dupe_future = cls.query(cls.properties_hash==h).order(cls.key).get_async()

  # At this point, the request is now in the DB but not yet in a mode where it
  # can be triggered or visible. Index it right away so it is searchable. If any
  # of remaining calls in this function fail, the TaskRequest and Search
  # Document will simply point to an incomplete task, which will be ignored.
  #
  # Creates the entities TaskToRun and TaskResultSummary but do not save them
  # yet. TaskRunResult will be created once a bot starts it.
  task = task_to_run.new_task_to_run(request)
  result_summary = task_result.new_result_summary(request)

  # Do not specify a doc_id, as they are guaranteed to be monotonically
  # increasing and searches are done in reverse order, which fits exactly the
  # created_ts ordering. This is useful because DateField is precise to the date
  # (!) and NumberField is signed 32 bits so the best it could do with EPOCH is
  # second resolution up to year 2038.
  index = search.Index(name='requests')
  packed = task_pack.pack_result_summary_key(result_summary.key)
  doc = search.Document(
      fields=[
        search.TextField(name='name', value=request.name),
        search.AtomField(name='id', value=packed),
      ])
  # Even if it fails here, we're still fine, as the task is not "alive" yet.
  search_future = index.put_async([doc])

  now = utils.utcnow()

  if dupe_future:
    # Reuse the results!
    dupe_summary = dupe_future.get_result()
    # Refuse tasks older than X days. This is due to the isolate server dropping
    # files. https://code.google.com/p/swarming/issues/detail?id=197
    oldest = now - datetime.timedelta(
        seconds=config.settings().reusable_task_age_secs)
    if dupe_summary and dupe_summary.created_ts > oldest:
      # If there's a bug, commenting out this block is sufficient to disable the
      # functionality.
      # Setting task.queue_number to None removes it from the scheduling.
      task.queue_number = None
      _copy_entity(dupe_summary, result_summary, ('created_ts', 'name', 'user'))
      result_summary.properties_hash = None
      result_summary.try_number = 0
      result_summary.cost_saved_usd = result_summary.cost_usd
      # Only zap after.
      result_summary.costs_usd = []
      result_summary.deduped_from = task_pack.pack_run_result_key(
          dupe_summary.run_result_key)

  # Get parent task details if applicable.
  parent_task_keys = None
  if request.parent_task_id:
    parent_run_key = task_pack.unpack_run_result_key(request.parent_task_id)
    parent_task_keys = [
      parent_run_key,
      task_pack.run_result_key_to_result_summary_key(parent_run_key),
    ]

  result_summary.modified_ts = now

  # Storing these entities makes this task live. It is important at this point
  # that the HTTP handler returns as fast as possible, otherwise the task will
  # be run but the client will not know about it.
  def run():
    ndb.put_multi([result_summary, task])

  def run_parent():
    # This one is slower.
    items = ndb.get_multi(parent_task_keys)
    k = result_summary.task_id
    for item in items:
      item.children_task_ids.append(k)
      item.modified_ts = now
    ndb.put_multi(items)

  # Raising will abort to the caller.
  futures = [datastore_utils.transaction_async(run)]
  if parent_task_keys:
    futures.append(datastore_utils.transaction_async(run_parent))

  try:
    search_future.get_result()
  except search.Error:
    # Do not abort the task, for now search is best effort.
    logging.exception('Put failed')

  for future in futures:
    # Check for failures, it would raise in this case, aborting the call.
    future.get_result()

  stats.add_task_entry(
      'task_enqueued', result_summary.key,
      dimensions=request.properties.dimensions,
      user=request.user)
  return result_summary
Example #42
0
def _handle_dead_bot(run_result_key):
  """Handles TaskRunResult where its bot has stopped showing sign of life.

  Transactionally updates the entities depending on the state of this task. The
  task may be retried automatically, canceled or left alone.

  Returns:
    True if the task was retried, False if the task was killed, None if no
    action was done.
  """
  result_summary_key = task_pack.run_result_key_to_result_summary_key(
      run_result_key)
  request_key = task_pack.result_summary_key_to_request_key(result_summary_key)
  request_future = request_key.get_async()
  now = utils.utcnow()
  server_version = utils.get_app_version()
  packed = task_pack.pack_run_result_key(run_result_key)
  request = request_future.get_result()
  to_run_key = task_to_run.request_to_task_to_run_key(request)

  def run():
    """Returns tuple(Result, bot_id)."""
    # Do one GET, one PUT at the end.
    run_result, result_summary, to_run = ndb.get_multi(
        (run_result_key, result_summary_key, to_run_key))
    if run_result.state != task_result.State.RUNNING:
      # It was updated already or not updating last. Likely DB index was stale.
      return None, run_result.bot_id

    run_result.signal_server_version(server_version)
    run_result.modified_ts = now
    if result_summary.try_number != run_result.try_number:
      # Not updating correct run_result, cancel it without touching
      # result_summary.
      to_put = (run_result,)
      run_result.state = task_result.State.BOT_DIED
      run_result.internal_failure = True
      run_result.abandoned_ts = now
      result = False
    elif result_summary.try_number == 1 and now < request.expiration_ts:
      # Retry it.
      to_put = (run_result, result_summary, to_run)
      to_run.queue_number = task_to_run.gen_queue_number(request)
      run_result.state = task_result.State.BOT_DIED
      run_result.internal_failure = True
      run_result.abandoned_ts = now
      # Do not sync data from run_result to result_summary, since the task is
      # being retried.
      result_summary.reset_to_pending()
      result_summary.modified_ts = now
      result = True
    else:
      # Cancel it, there was more than one try or the task expired in the
      # meantime.
      to_put = (run_result, result_summary)
      run_result.state = task_result.State.BOT_DIED
      run_result.internal_failure = True
      run_result.abandoned_ts = now
      result_summary.set_from_run_result(run_result, request)
      result = False
    ndb.put_multi(to_put)
    return result, run_result.bot_id

  try:
    success, bot_id = datastore_utils.transaction(run)
  except datastore_utils.CommitError:
    success, bot_id = None, None
  if success is not None:
    task_to_run.set_lookup_cache(to_run_key, success)
    if not success:
      stats.add_run_entry(
          'run_bot_died', run_result_key,
          bot_id=bot_id[0],
          dimensions=request.properties.dimensions,
          user=request.user)
    else:
      logging.info('Retried %s', packed)
  else:
    logging.info('Ignored %s', packed)
  return success
Example #43
0
 def tag_entries(entries, namespace):
   """Enqueues a task to update the timestamp for given entries."""
   url = '/internal/taskqueue/tag/%s/%s' % (
       namespace, utils.datetime_to_timestamp(utils.utcnow()))
   payload = ''.join(binascii.unhexlify(e.digest) for e in entries)
   return utils.enqueue_task(url, 'tag', payload=payload)
Example #44
0
def yield_next_available_task_to_dispatch(bot_dimensions, deadline):
    """Yields next available (TaskRequest, TaskToRun) in decreasing order of
  priority.

  Once the caller determines the task is suitable to execute, it must use
  reap_task_to_run(task.key) to mark that it is not to be scheduled anymore.

  Performance is the top most priority here.

  Arguments:
  - bot_dimensions: dimensions (as a dict) defined by the bot that can be
      matched.
  - deadline: UTC timestamp (as an int) that the bot must be able to
      complete the task by. None if there is no such deadline.
  """
    assert len(bot_dimensions['id']) == 1, bot_dimensions
    # List of all the valid dimensions hashed.
    now = utils.utcnow()
    stats = _QueryStats()
    stats.deadline = deadline
    bot_id = bot_dimensions[u'id'][0]
    futures = collections.deque()
    try:
        for ttr in _yield_potential_tasks(bot_id):
            duration = (utils.utcnow() - now).total_seconds()
            if duration > 40.:
                # Stop searching after too long, since the odds of the request blowing
                # up right after succeeding in reaping a task is not worth the dangling
                # task request that will stay in limbo until the cron job reaps it and
                # retry it. The current handlers are given 60s to complete. By limiting
                # search to 40s, it gives 20s to complete the reaping and complete the
                # HTTP request.
                return
            futures.append(
                _validate_task_async(bot_dimensions, deadline, stats, now,
                                     ttr))
            while futures:
                # Keep a FIFO queue ordering.
                if futures[0].done():
                    request, task = futures[0].get_result()
                    if request:
                        yield request, task
                        # If the code is still executed, it means that the task reaping
                        # wasn't successful. Note that this includes expired ones, which is
                        # kinda weird but it's not a big deal.
                        stats.ignored += 1
                    futures.popleft()
                # Don't batch too much.
                if len(futures) < 50:
                    break
                futures[0].wait()

        # No more tasks to yield. Empty the pending futures.
        while futures:
            request, task = futures[0].get_result()
            if request:
                yield request, task
                # If the code is still executed, it means that the task reaping
                # wasn't successful. Same as above about expired.
                stats.ignored += 1
            futures.popleft()
    finally:
        # Don't leave stray RPCs as much as possible, this can mess up following
        # HTTP handlers.
        ndb.Future.wait_all(futures)
        # stats output is a bit misleading here, as many _validate_task_async()
        # could be started yet never yielded.
        logging.debug('yield_next_available_task_to_dispatch(%s) in %.3fs: %s',
                      bot_id, (utils.utcnow() - now).total_seconds(), stats)
Example #45
0
    def test_works(self):
        self.mock_now(datetime.datetime(2014, 1, 1, 1, 1, 1))
        self.configure_as_replica(0)

        # Prepare auth db state.
        model.AuthGlobalConfig(key=model.root_key(),
                               modified_ts=utils.utcnow(),
                               oauth_client_id='oauth_client_id',
                               oauth_client_secret='oauth_client_secret',
                               oauth_additional_client_ids=['a', 'b']).put()

        def group(name, **kwargs):
            return model.AuthGroup(key=model.group_key(name),
                                   created_ts=utils.utcnow(),
                                   modified_ts=utils.utcnow(),
                                   **kwargs)

        group('Modify').put()
        group('Delete').put()
        group('Keep').put()

        def secret(name, scope, **kwargs):
            return model.AuthSecret(id=name,
                                    parent=model.secret_scope_key(scope),
                                    **kwargs)

        secret('modify', 'global').put()
        secret('delete', 'global').put()
        secret('keep', 'global').put()
        secret('local', 'local').put()

        def ip_whitelist(name, **kwargs):
            return model.AuthIPWhitelist(key=model.ip_whitelist_key(name),
                                         created_ts=utils.utcnow(),
                                         modified_ts=utils.utcnow(),
                                         **kwargs)

        ip_whitelist('modify').put()
        ip_whitelist('delete').put()
        ip_whitelist('keep').put()

        def assignment(ident, ip_whitelist):
            return model.AuthIPWhitelistAssignments.Assignment(
                identity=model.Identity.from_bytes(ident),
                ip_whitelist=ip_whitelist,
                created_ts=utils.utcnow(),
                comment='comment')

        model.AuthIPWhitelistAssignments(
            key=model.ip_whitelist_assignments_key(),
            modified_ts=utils.utcnow(),
            assignments=[
                assignment('user:[email protected]', 'modify'),
                assignment('user:[email protected]', 'delete'),
                assignment('user:[email protected]', 'keep'),
            ]).put()

        # Prepare snapshot.
        snapshot = replication.AuthDBSnapshot(
            global_config=model.AuthGlobalConfig(
                key=model.root_key(),
                modified_ts=utils.utcnow(),
                oauth_client_id='another_oauth_client_id',
                oauth_client_secret='another_oauth_client_secret',
                oauth_additional_client_ids=[]),
            groups=[
                group('New'),
                group('Modify', description='blah',
                      owners='some-other-owners'),
                group('Keep'),
            ],
            secrets=[
                secret('new', 'global'),
                secret('modify', 'global', values=['1234']),
                secret('keep', 'global'),
            ],
            ip_whitelists=[
                ip_whitelist('new', subnets=['1.1.1.1/32']),
                ip_whitelist('modify',
                             subnets=['127.0.0.1/32', '192.168.0.1/32']),
                ip_whitelist('keep'),
            ],
            ip_whitelist_assignments=model.AuthIPWhitelistAssignments(
                key=model.ip_whitelist_assignments_key(),
                assignments=[
                    assignment('user:[email protected]', 'new'),
                    assignment('user:[email protected]', 'modify'),
                    assignment('user:[email protected]', 'keep'),
                ],
            ),
        )

        # Push it.
        updated, state = replication.replace_auth_db(
            auth_db_rev=1234,
            modified_ts=datetime.datetime(2014, 1, 1, 1, 1, 1),
            snapshot=snapshot)
        self.assertTrue(updated)
        expected_state = {
            'auth_db_rev': 1234,
            'modified_ts': datetime.datetime(2014, 1, 1, 1, 1, 1),
            'primary_id': u'primary',
            'primary_url': u'https://primary',
        }
        self.assertEqual(expected_state, state.to_dict())

        # Verify expected Auth db state.
        current_state, current_snapshot = replication.new_auth_db_snapshot()
        self.assertEqual(expected_state, current_state.to_dict())

        expected_auth_db = {
            'global_config': {
                '__id__': 'root',
                '__parent__': None,
                'auth_db_rev': None,
                'auth_db_prev_rev': None,
                'modified_by': None,
                'modified_ts': datetime.datetime(2014, 1, 1, 1, 1, 1),
                'oauth_additional_client_ids': [],
                'oauth_client_id': u'another_oauth_client_id',
                'oauth_client_secret': u'another_oauth_client_secret'
            },
            'groups': [
                {
                    '__id__': 'Keep',
                    '__parent__': ndb.Key('AuthGlobalConfig', 'root'),
                    'auth_db_rev': None,
                    'auth_db_prev_rev': None,
                    'created_by': None,
                    'created_ts': datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'description': u'',
                    'globs': [],
                    'members': [],
                    'modified_by': None,
                    'modified_ts': datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'nested': [],
                    'owners': u'administrators',
                },
                {
                    '__id__': 'Modify',
                    '__parent__': ndb.Key('AuthGlobalConfig', 'root'),
                    'auth_db_rev': None,
                    'auth_db_prev_rev': None,
                    'created_by': None,
                    'created_ts': datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'description': u'blah',
                    'globs': [],
                    'members': [],
                    'modified_by': None,
                    'modified_ts': datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'nested': [],
                    'owners': u'some-other-owners',
                },
                {
                    '__id__': 'New',
                    '__parent__': ndb.Key('AuthGlobalConfig', 'root'),
                    'auth_db_rev': None,
                    'auth_db_prev_rev': None,
                    'created_by': None,
                    'created_ts': datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'description': u'',
                    'globs': [],
                    'members': [],
                    'modified_by': None,
                    'modified_ts': datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'nested': [],
                    'owners': u'administrators',
                },
            ],
            'secrets': [
                {
                    '__id__':
                    'keep',
                    '__parent__':
                    ndb.Key('AuthGlobalConfig', 'root', 'AuthSecretScope',
                            'global'),
                    'modified_by':
                    None,
                    'modified_ts':
                    datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'values': [],
                },
                {
                    '__id__':
                    'modify',
                    '__parent__':
                    ndb.Key('AuthGlobalConfig', 'root', 'AuthSecretScope',
                            'global'),
                    'modified_by':
                    None,
                    'modified_ts':
                    datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'values': ['1234'],
                },
                {
                    '__id__':
                    'new',
                    '__parent__':
                    ndb.Key('AuthGlobalConfig', 'root', 'AuthSecretScope',
                            'global'),
                    'modified_by':
                    None,
                    'modified_ts':
                    datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'values': [],
                },
            ],
            'ip_whitelists': [
                {
                    '__id__': 'keep',
                    '__parent__': ndb.Key('AuthGlobalConfig', 'root'),
                    'auth_db_rev': None,
                    'auth_db_prev_rev': None,
                    'created_by': None,
                    'created_ts': datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'description': u'',
                    'modified_by': None,
                    'modified_ts': datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'subnets': [],
                },
                {
                    '__id__': 'modify',
                    '__parent__': ndb.Key('AuthGlobalConfig', 'root'),
                    'auth_db_rev': None,
                    'auth_db_prev_rev': None,
                    'created_by': None,
                    'created_ts': datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'description': u'',
                    'modified_by': None,
                    'modified_ts': datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'subnets': [u'127.0.0.1/32', u'192.168.0.1/32'],
                },
                {
                    '__id__': 'new',
                    '__parent__': ndb.Key('AuthGlobalConfig', 'root'),
                    'auth_db_rev': None,
                    'auth_db_prev_rev': None,
                    'created_by': None,
                    'created_ts': datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'description': u'',
                    'modified_by': None,
                    'modified_ts': datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'subnets': [u'1.1.1.1/32'],
                },
            ],
            'ip_whitelist_assignments': {
                '__id__':
                'default',
                '__parent__':
                ndb.Key('AuthGlobalConfig', 'root'),
                'assignments': [
                    {
                        'comment':
                        u'comment',
                        'created_by':
                        None,
                        'created_ts':
                        datetime.datetime(2014, 1, 1, 1, 1, 1),
                        'identity':
                        model.Identity(kind='user', name='*****@*****.**'),
                        'ip_whitelist':
                        u'new',
                    },
                    {
                        'comment':
                        u'comment',
                        'created_by':
                        None,
                        'created_ts':
                        datetime.datetime(2014, 1, 1, 1, 1, 1),
                        'identity':
                        model.Identity(kind='user', name='*****@*****.**'),
                        'ip_whitelist':
                        u'modify',
                    },
                    {
                        'comment':
                        u'comment',
                        'created_by':
                        None,
                        'created_ts':
                        datetime.datetime(2014, 1, 1, 1, 1, 1),
                        'identity':
                        model.Identity(kind='user', name='*****@*****.**'),
                        'ip_whitelist':
                        u'keep',
                    },
                ],
                'auth_db_rev':
                None,
                'auth_db_prev_rev':
                None,
                'modified_by':
                None,
                'modified_ts':
                None,  # not transfered currently in proto
            },
        }
        self.assertEqual(expected_auth_db, snapshot_to_dict(current_snapshot))

        # Ensure local secret was left intact.
        local_secrets = model.AuthSecret.query(
            ancestor=model.secret_scope_key('local'))
        expected_local_secrets = [
            {
                '__id__':
                'local',
                '__parent__':
                ndb.Key('AuthGlobalConfig', 'root', 'AuthSecretScope',
                        'local'),
                'modified_by':
                None,
                'modified_ts':
                datetime.datetime(2014, 1, 1, 1, 1, 1),
                'values': [],
            },
        ]
        self.assertEqual(expected_local_secrets,
                         [entity_to_dict(s) for s in local_secrets])
Example #46
0
def build_to_message(build_bundle, include_lease_key=False):
  """Converts a model.BuildBundle to BuildMessage."""
  build = build_bundle.build

  assert build
  assert build.key
  assert build.key.id()

  bp = build.proto
  infra = build_bundle.infra.parse()
  sw = infra.swarming
  logdog = infra.logdog
  recipe = infra.recipe

  result_details = (build.result_details or {}).copy()
  result_details['properties'] = {}
  if build_bundle.output_properties:  # pragma: no branch
    result_details['properties'] = _properties_to_dict(
        build_bundle.output_properties.parse()
    )
  if bp.summary_markdown:
    result_details['ui'] = {'info': bp.summary_markdown}

  parameters = (build.parameters or {}).copy()
  parameters[BUILDER_PARAMETER] = bp.builder.builder
  parameters[PROPERTIES_PARAMETER] = _properties_to_dict(
      infra.buildbucket.requested_properties
  )

  recipe_name = recipe.name
  if build_bundle.input_properties:  # pragma: no cover
    input_props = build_bundle.input_properties.parse()
    if 'recipe' in input_props.fields:
      recipe_name = input_props['recipe']

  if bp.status != common_pb2.SUCCESS and bp.summary_markdown:
    result_details['error'] = {
        'message': bp.summary_markdown,
    }

  if sw.bot_dimensions:
    by_key = {}
    for d in sw.bot_dimensions:
      by_key.setdefault(d.key, []).append(d.value)
    result_details.setdefault('swarming', {})['bot_dimensions'] = by_key

  tags = set(build.tags)
  if build.is_luci:
    tags.add('swarming_hostname:%s' % sw.hostname)
    tags.add('swarming_task_id:%s' % sw.task_id)

    # Milo uses swarming tags.
    tags.add('swarming_tag:recipe_name:%s' % recipe_name)
    tags.add(
        'swarming_tag:recipe_package:%s' %
        (bp.exe.cipd_package or recipe.cipd_package)
    )
    tags.add(
        'swarming_tag:log_location:logdog://%s/%s/%s/+/annotations' %
        (logdog.hostname, logdog.project, logdog.prefix)
    )
    tags.add('swarming_tag:luci_project:%s' % bp.builder.project)

    # Try to find OS
    for d in sw.bot_dimensions:
      if d.key == 'os':
        tags.add('swarming_tag:os:%s' % d.value)
        break

  msg = BuildMessage(
      id=build.key.id(),
      project=bp.builder.project,
      bucket=legacy_bucket_name(build.bucket_id, build.is_luci),
      tags=sorted(tags),
      parameters_json=json.dumps(parameters, sort_keys=True),
      status=build.status_legacy,
      result=build.result,
      result_details_json=json.dumps(result_details, sort_keys=True),
      cancelation_reason=build.cancelation_reason,
      failure_reason=build.failure_reason,
      lease_key=build.lease_key if include_lease_key else None,
      url=get_build_url(build),
      created_ts=proto_to_timestamp(bp.create_time),
      started_ts=proto_to_timestamp(bp.start_time),
      updated_ts=proto_to_timestamp(bp.update_time),
      completed_ts=proto_to_timestamp(bp.end_time),
      created_by=build.created_by.to_bytes() if build.created_by else None,
      status_changed_ts=utils.datetime_to_timestamp(build.status_changed_time),
      utcnow_ts=utils.datetime_to_timestamp(utils.utcnow()),
      retry_of=build.retry_of,
      canary_preference=(
          # This is not accurate, but it does not matter at this point.
          # This is deprecated.
          CanaryPreference.CANARY if build.canary else CanaryPreference.PROD
      ),
      canary=build.canary,
      experimental=build.experimental,
      service_account=sw.task_service_account,
      # when changing this function, make sure build_to_dict would still work
  )

  if build.lease_expiration_date is not None:
    msg.lease_expiration_ts = utils.datetime_to_timestamp(
        build.lease_expiration_date
    )
  return msg
Example #47
0
  def post(self, name):
    """Creates a new entity, ensuring it's indeed new (no overwrites)."""
    self.check_preconditions()
    try:
      body = self.parse_body()
      name_in_body = body.pop('name', None)
      if not name_in_body or name_in_body != name:
        raise ValueError('Missing or mismatching name in request body')
      if not self.is_entity_writable(name):
        raise ValueError('This %s is not writable' % self.entity_kind_title)
      entity = self.entity_kind.from_serializable_dict(
          serializable_dict=body,
          key=self.get_entity_key(name),
          created_ts=utils.utcnow(),
          created_by=api.get_current_identity())
    except (TypeError, ValueError) as e:
      self.abort_with_error(400, text=str(e))

    # No need to enter a transaction (like in do_update) to check this.
    if not self.can_create():
      raise api.AuthorizationError(
          '"%s" has no permission to create a %s' %
          (api.get_current_identity().to_bytes(), self.entity_kind_title))

    @ndb.transactional
    def create(entity):
      if entity.key.get():
        return False, {
          'http_code': 409,
          'text': 'Such %s already exists' % self.entity_kind_title,
        }
      entity.record_revision(
          modified_by=api.get_current_identity(),
          modified_ts=utils.utcnow(),
          comment='REST API')
      try:
        self.do_create(entity)
      except EntityOperationError as exc:
        return False, {
          'http_code': 409,
          'text': exc.message,
          'details': exc.details,
        }
      except ValueError as exc:
        return False, {
          'http_code': 400,
          'text': str(exc),
        }
      model.replicate_auth_db()
      return True, None

    success, error_details = create(entity)
    if not success:
      self.abort_with_error(**error_details)
    self.send_response(
        response={'ok': True},
        http_code=201,
        headers={
          'Last-Modified': utils.datetime_to_rfc2822(entity.modified_ts),
          'Location':
              '%s%s' % (self.entity_url_prefix, urllib.quote(entity.key.id())),
        }
    )
Example #48
0
def delegate_async(audience,
                   services,
                   min_validity_duration_sec=5 * 60,
                   max_validity_duration_sec=60 * 60 * 3,
                   impersonate=None,
                   tags=None,
                   token_server_url=None):
    """Creates a delegation token by contacting the token server.

  Memcaches the token.

  Args:
    audience (list of (str or Identity)): to WHOM caller's identity is
      delegated; a list of identities or groups, a string "REQUESTOR" (to
      indicate the current service) or symbol '*' (which means ANY).
      Example: ['user:[email protected]', 'group:abcdef', 'REQUESTOR'].
    services (list of (str or Identity)): WHERE token is accepted.
      Each list element must be an identity of 'service' kind, a root URL of a
      service (e.g. 'https://....'), or symbol '*'.
      Example: ['service:gae-app1', 'https://gae-app2.appspot.com']
    min_validity_duration_sec (int): minimally acceptable lifetime of the token.
      If there's existing token cached locally that have TTL
      min_validity_duration_sec or more, it will be returned right away.
      Default is 5 min.
    max_validity_duration_sec (int): defines lifetime of a new token.
      It will bet set as tokens' TTL if there's no existing cached tokens with
      sufficiently long lifetime. Default is 3 hours.
    impersonate (str or Identity): a caller can mint a delegation token on
      someone else's behalf (effectively impersonating them). Only a privileged
      set of callers can do that. If impersonation is allowed, token's
      delegated_identity field will contain whatever is in 'impersonate' field.
      Example: 'user:[email protected]'
    tags (list of str): optional list of key:value pairs to embed into the
      token. Services that accept the token may use them for additional
      authorization decisions.
    token_server_url (str): the URL for the token service that will mint the
      token. Defaults to the URL provided by the primary auth service.

  Returns:
    DelegationToken as ndb.Future.

  Raises:
    ValueError if args are invalid.
    TokenCreationError if could not create a token.
    TokenAuthorizationError on HTTP 403 response from auth service.
  """
    assert isinstance(audience, list), audience
    assert isinstance(services, list), services

    id_to_str = lambda i: i.to_bytes() if isinstance(i, model.Identity) else i

    # Validate audience.
    if '*' in audience:
        audience = ['*']
    else:
        if not audience:
            raise ValueError('audience can\'t be empty')
        for a in audience:
            if isinstance(a, model.Identity):
                continue  # identities are already validated
            if not isinstance(a, basestring):
                raise ValueError('expecting a string or Identity')
            if a == 'REQUESTOR' or a.startswith('group:'):
                continue
            # The only remaining option is a string that represents an identity.
            # Validate it. from_bytes may raise ValueError.
            model.Identity.from_bytes(a)
        audience = sorted(map(id_to_str, audience))

    # Validate services.
    if '*' in services:
        services = ['*']
    else:
        if not services:
            raise ValueError('services can\'t be empty')
        for s in services:
            if isinstance(s, basestring):
                if s.startswith('https://'):
                    continue  # an URL, the token server knows how to handle it
                s = model.Identity.from_bytes(s)
            assert isinstance(s, model.Identity), s
            assert s.kind == model.IDENTITY_SERVICE, s
        services = sorted(map(id_to_str, services))

    # Validate validity durations.
    assert isinstance(min_validity_duration_sec,
                      int), min_validity_duration_sec
    assert isinstance(max_validity_duration_sec,
                      int), max_validity_duration_sec
    assert min_validity_duration_sec >= 5
    assert max_validity_duration_sec >= 5
    assert min_validity_duration_sec <= max_validity_duration_sec

    # Validate impersonate.
    if impersonate is not None:
        assert isinstance(impersonate,
                          (basestring, model.Identity)), impersonate
        impersonate = id_to_str(impersonate)

    # Validate tags.
    tags = sorted(tags or [])
    for tag in tags:
        parts = tag.split(':', 1)
        if len(parts) != 2 or parts[0] == '' or parts[1] == '':
            raise ValueError('Bad delegation token tag: %r' % tag)

    # Grab the token service URL.
    if not token_server_url:
        token_server_url = api.get_request_auth_db().token_server_url
        if not token_server_url:
            raise exceptions.TokenCreationError(
                'Token server URL is not configured')

    # End of validation.

    # See MintDelegationTokenRequest in
    # https://github.com/luci/luci-go/blob/master/tokenserver/api/minter/v1/token_minter.proto.
    req = {
        'delegatedIdentity': impersonate or 'REQUESTOR',
        'validityDuration': max_validity_duration_sec,
        'audience': audience,
        'services': services,
        'tags': tags,
    }

    # Get from cache.
    cache_key_hash = hashlib.sha256(
        token_server_url + '\n' + json.dumps(req, sort_keys=True)).hexdigest()
    cache_key = 'delegation_token/v2/%s' % cache_key_hash
    ctx = ndb.get_context()
    token = yield ctx.memcache_get(cache_key)
    min_validity_duration = datetime.timedelta(
        seconds=min_validity_duration_sec)
    now = utils.utcnow()
    if token and token.expiry - min_validity_duration > now:
        logging.info('Fetched cached delegation token: fingerprint=%s',
                     utils.get_token_fingerprint(token.token))
        raise ndb.Return(token)

    # Request a new one.
    logging.info(
        'Minting a delegation token for %r',
        {k: v
         for k, v in req.items() if v},
    )
    res = yield service_account.authenticated_request_async(
        '%s/prpc/tokenserver.minter.TokenMinter/MintDelegationToken' %
        token_server_url,
        method='POST',
        payload=req)

    signed_token = res.get('token')
    if not signed_token or not isinstance(signed_token, basestring):
        logging.error('Bad MintDelegationToken response: %s', res)
        raise exceptions.TokenCreationError('Bad response, no token')

    token_struct = res.get('delegationSubtoken')
    if not token_struct or not isinstance(token_struct, dict):
        logging.error('Bad MintDelegationToken response: %s', res)
        raise exceptions.TokenCreationError(
            'Bad response, no delegationSubtoken')

    if token_struct.get('kind') != 'BEARER_DELEGATION_TOKEN':
        logging.error('Bad MintDelegationToken response: %s', res)
        raise exceptions.TokenCreationError(
            'Bad response, not BEARER_DELEGATION_TOKEN')

    actual_validity_duration_sec = token_struct.get('validityDuration')
    if not isinstance(actual_validity_duration_sec, (int, float)):
        logging.error('Bad MintDelegationToken response: %s', res)
        raise exceptions.TokenCreationError(
            'Unexpected response, validityDuration is absent or not a number')

    token = DelegationToken(
        token=str(signed_token),
        expiry=now + datetime.timedelta(seconds=actual_validity_duration_sec),
    )

    logging.info(
        'Token server "%s" generated token (subtoken_id=%s, fingerprint=%s):\n%s',
        res.get('serviceVersion'), token_struct.get('subtokenId'),
        utils.get_token_fingerprint(token.token),
        json.dumps(res.get('delegationSubtoken'),
                   sort_keys=True,
                   indent=2,
                   separators=(',', ': ')))

    # Put to cache. Refresh the token 10 sec in advance.
    if actual_validity_duration_sec > 10:
        yield ctx.memcache_add(cache_key,
                               token,
                               time=actual_validity_duration_sec - 10)

    raise ndb.Return(token)
Example #49
0
def launch_job(job_id):
    """Launches a job given its key from MAPREDUCE_JOBS dict."""
    assert job_id in MAPREDUCE_JOBS, 'Unknown mapreduce job id %s' % job_id
    job_def = MAPREDUCE_JOBS[job_id].copy()
    job_def.setdefault('shard_count', 256)
    job_def.setdefault('queue_name', MAPREDUCE_TASK_QUEUE)
    job_def.setdefault('reader_spec',
                       'mapreduce.input_readers.DatastoreInputReader')
    job_def.setdefault('handler_spec', 'mapreduce_jobs.' + job_id)
    return control.start_map(base_path='/internal/mapreduce', **job_def)


### Actual mappers

OLD_TASKS_CUTOFF = utils.utcnow() - datetime.timedelta(hours=12)


def backfill_tags(entity):
    # Already handled?
    if entity.tags:
        return

    # TaskRequest is immutable, can be fetched outside the transaction.
    task_request = entity.request_key.get(use_cache=False, use_memcache=False)
    if not task_request or not task_request.tags:
        return

    # Fast path for old entries: do not use transaction, assumes old entities are
    # not being concurrently modified outside of this job.
    if entity.created_ts and entity.created_ts < OLD_TASKS_CUTOFF:
def get_oauth_token_grant(service_account, validity_duration):
    """Returns "OAuth token grant" that allows usage of the service account.

  OAuth token grant is a signed assertion that basically says "the token server
  approves the usage of <service_account> by the <end-user>, and this assertion
  is valid for <validity_duration>".

  This function is called when the task is posted, while the end-user is still
  present. The grant it either generated by contacting the token server or
  fetched from the cache (if the cached one lives long enough).

  This function must not be used if 'has_token_server()' returns False. It will
  raise assertion error.

  The grant is later passed back to the token server to generate an actual OAuth
  access token. When this happens, the token server rechecks the ACLs, so it's
  fine to have large 'validity_duration' here. It basically defines for how long
  to cache "positive" ACL check.

  Args:
    service_account: a service account email to use.
    validity_duration: timedelta with how long the returned grant should live.

  Returns:
    Base64-encoded string with the grant body.

  Raises:
    PermissionError if the token server forbids the usage.
    MisconfigurationError if the service account is misconfigured.
    InternalError if the RPC fails unexpectedly.
  """
    assert has_token_server()
    assert service_accounts_utils.is_service_account(
        service_account), service_account

    end_user = auth.get_current_identity()

    existing_grant = None
    existing_exp_ts = None

    # Try to find a cached token first.
    cache_key = _oauth_token_grant_cache_key(service_account, end_user)
    cached = memcache.get(cache_key, namespace=_OAUTH_TOKEN_GRANT_CACHE_NS)
    if cached:
        try:
            existing_grant = cached['oauth_token_grant']
            existing_exp_ts = utils.timestamp_to_datetime(cached['exp_ts'])
            if not isinstance(existing_grant, str):
                raise TypeError('"oauth_token_grant" should be str')
        except (KeyError, ValueError, TypeError):
            # Treat malformed data as a cache miss. This should not happen generally.
            logging.exception(
                'Failed to parse oauth token grant cache entry: %s')
            existing_grant = None
            existing_exp_ts = None

    # Randomly "expire" a cached token a bit prematurely to avoid a storm of
    # refresh requests when it expires for everyone for real. With a randomization
    # only few unlucky requests (most likely one) will hit the token refresh
    # procedure.
    now = utils.utcnow()
    if existing_exp_ts:
        rnd = datetime.timedelta(seconds=random.randint(0, 600))
        if now > existing_exp_ts - rnd:
            existing_grant = None
            existing_exp_ts = None

    # Does the cached token live long enough to be useful for the caller?
    if existing_exp_ts and existing_exp_ts > now + validity_duration:
        _log_token_grant('Using cached', existing_grant, existing_exp_ts)
        return existing_grant

    # Need to make a new token either because the cached one has expired or it
    # doesn't live long enough.
    #
    # We give the new token 1h of extra lifetime to make sure it can be reused by
    # next ~1h worth of tasks (assuming all tasks request exact same lifetime).
    # Without this trick each new task will attempt to generate new token, seeing
    # that the cached one expired just a few moments ago. With 1h extra lifetime
    # we effectively cache the token for 1h (minus 0-10 min due to the expiration
    # randomization above).
    #
    # Note: this call raises auth.AuthorizationError if the current caller is not
    # allowed to use the service account.
    new_grant, new_exp_ts = _mint_oauth_token_grant(
        service_account, end_user,
        validity_duration + datetime.timedelta(hours=1))

    # Verify the token server produces a token that lives long enough. The expiry
    # of new token must surely be above validity_duration, since we request 1h of
    # extra life.
    if new_exp_ts < now + validity_duration:
        _log_token_grant('Got unexpectedly short-lived',
                         new_grant,
                         new_exp_ts,
                         log_call=logging.error)
        raise InternalError(
            'Got unexpectedly short-lived grant, see server logs')

    # New token is good.
    memcache.set(key=cache_key,
                 value={
                     'oauth_token_grant': new_grant,
                     'exp_ts': utils.datetime_to_timestamp(new_exp_ts),
                 },
                 time=utils.datetime_to_timestamp(new_exp_ts) / 1e6,
                 namespace=_OAUTH_TOKEN_GRANT_CACHE_NS)

    _log_token_grant('Generated new', new_grant, new_exp_ts)
    return new_grant
Example #51
0
def _process_pull_task_batch(queue_name, dataset, table_name):
    """Exports up to 300 builds to BigQuery.

  Leases pull tasks, fetches build entities and inserts them into BigQuery.

  If the build is not finalized and it has been 20m or more since the build was
  completed, the following strategies apply:
  - if the build infra-failed with BOT_DIED or TIMED_OUT task status,
    saves build as is.
  - if the build infra-failed with BOOTSTRAPPER_ERROR and there are no steps,
    assumes the build failed to register LogDog prefix and saves it as is.
  - otherwise logs a warning/error, does not save to BigQuery and retries the
    task later.

  Returns: (inserted_count, total_count) tuple.
  """
    now = utils.utcnow()

    # Lease tasks.
    lease_duration = datetime.timedelta(minutes=5)
    lease_deadline = now + lease_duration
    q = taskqueue.Queue(queue_name)
    # https://cloud.google.com/bigquery/quotas#streaming_inserts
    # says "We recommend using about 500 rows per request".
    # We are using less because otherwise we tend to hit the 10 MB per request
    # limit.
    tasks = q.lease_tasks(lease_duration.total_seconds(), 300)
    if not tasks:
        return 0, 0

    build_ids = [json.loads(t.payload)['id'] for t in tasks]

    # IDs of builds that we could not save and want to retry later.
    ids_to_retry = set()
    # model.Build objects to insert to BigQuery.
    to_insert = []

    builds = ndb.get_multi(ndb.Key(model.Build, bid) for bid in build_ids)
    for bid, b in zip(build_ids, builds):
        if not b:
            logging.error('skipping build %d: not found', bid)
        elif not b.is_ended:
            logging.error('will retry build: not complete\n%d', bid)
            ids_to_retry.add(bid)
        else:
            to_insert.append(b)

    row_count = 0
    if to_insert:
        not_inserted_ids = _export_builds(dataset, table_name, to_insert,
                                          lease_deadline)
        row_count = len(to_insert) - len(not_inserted_ids)
        ids_to_retry.update(not_inserted_ids)

    if ids_to_retry:
        logging.warning('will retry builds %r later', sorted(ids_to_retry))

    done_tasks = [
        t for bid, t in zip(build_ids, tasks) if bid not in ids_to_retry
    ]
    q.delete_tasks(done_tasks)
    logging.info('inserted %d rows, processed %d tasks', row_count,
                 len(done_tasks))
    return len(done_tasks), len(tasks)
Example #52
0
def update_replicas_task(auth_db_rev):
    """Packs AuthDB and pushes it to all out-of-date Replicas.

  Called via /internal/taskqueue/replication/<auth_db_rev> task (see
  backend/handlers.py) enqueued by 'trigger_replication'.

  Will check that AuthReplicationState.auth_db_rev is still equal to
  |auth_db_rev| before doing anything.

  Returns:
    True if all replicas are up-to-date now, False if task should be retried.
  """
    # Check that the task is not stale before doing any heavy lifting.
    replication_state = model.get_replication_state()
    if replication_state.auth_db_rev != auth_db_rev:
        logging.info(
            'Skipping stale task, current rev is %d, task was enqueued for rev %d)',
            replication_state.auth_db_rev, auth_db_rev)
        return True

    # Pack an entire AuthDB into a blob to be to stored in the datastore and
    # pushed to Replicas.
    replication_state, auth_db_blob = pack_auth_db()

    # Put the blob into datastore. Also updates pointer to the latest stored blob.
    store_auth_db_snapshot(replication_state, auth_db_blob)

    # Notify PubSub subscribers that new snapshot is available.
    pubsub.publish_authdb_change(replication_state)

    # Grab last known replicas state and push only to replicas that are behind.
    stale_replicas = [
        entity
        for entity in AuthReplicaState.query(ancestor=replicas_root_key())
        if entity.auth_db_rev is None or entity.auth_db_rev < auth_db_rev
    ]
    if not stale_replicas:
        logging.info('All replicas are up-to-date.')
        return True

    # Sign the blob, replicas check the signature.
    key_name, sig = sign_auth_db_blob(auth_db_blob)

    # Push the blob to all out-of-date replicas, in parallel.
    push_started_ts = utils.utcnow()
    futures = {
        push_to_replica(replica.replica_url, auth_db_blob, key_name, sig):
        replica
        for replica in stale_replicas
    }

    # Wait for all attempts to complete.
    retry = []
    while futures:
        completed = ndb.Future.wait_any(futures)
        replica = futures.pop(completed)

        exception = completed.get_exception()
        success = exception is None

        current_revision = None
        auth_code_version = None
        if success:
            current_revision, auth_code_version = completed.get_result()

        if not success:
            logging.error(
                'Error when pushing update to replica: %s (%s).\nReplica id is %s.',
                exception.__class__.__name__, exception, replica.key.id())
            # Give up only on explicit fatal error, retry on any other exception.
            if not isinstance(exception, FatalReplicaUpdateError):
                retry.append(replica)

        # Eagerly update known replica state in local DB as soon as response is
        # received. That way if 'update_replicas_task' is killed midway, at least
        # the state of some replicas will be updated. Note that this transaction is
        # modifying a single entity group (replicas_root_key()) and thus can't be
        # called very often (due to 1 QPS limit on entity group updates).
        # If contention here becomes an issue, adding simple time.sleep(X) before
        # the transaction is totally fine (since 'update_replicas_task' is executed
        # on background task queue).
        try:
            if success:
                stored_rev = _update_state_on_success(
                    key=replica.key,
                    started_ts=push_started_ts,
                    finished_ts=utils.utcnow(),
                    current_revision=current_revision,
                    auth_code_version=auth_code_version)
                logging.info('Replica %s is updated to rev %d',
                             replica.key.id(), stored_rev)
            else:
                stored_rev = _update_state_on_fail(
                    key=replica.key,
                    started_ts=push_started_ts,
                    finished_ts=utils.utcnow(),
                    old_auth_db_rev=replica.auth_db_rev,
                    exc=exception)
                # If current push failed, but some other concurrent push (if any)
                # succeeded (and so replica is up-to-date), do not retry current push.
                if stored_rev is None or stored_rev > auth_db_rev:
                    if replica in retry:
                        retry.remove(replica)
        except (datastore_errors.InternalError, datastore_errors.Timeout,
                datastore_errors.TransactionFailedError) as exc:
            logging.exception(
                'Datastore error when updating replica state: %s.\n'
                'Replica id is %s.', exc.__class__.__name__, replica.key.id())
            # Should retry the task because of this.
            retry.add(replica)

    # Retry the task if at least one replica reported a retryable error.
    return not retry
Example #53
0
    def get(self, task_id):
        try:
            key = task_pack.unpack_result_summary_key(task_id)
            request_key = task_pack.result_summary_key_to_request_key(key)
        except ValueError:
            try:
                key = task_pack.unpack_run_result_key(task_id)
                request_key = task_pack.result_summary_key_to_request_key(
                    task_pack.run_result_key_to_result_summary_key(key))
            except (NotImplementedError, ValueError):
                self.abort(404, 'Invalid key format.')

        # 'result' can be either a TaskRunResult or TaskResultSummary.
        result_future = key.get_async()
        request_future = request_key.get_async()
        result = result_future.get_result()
        if not result:
            self.abort(404, 'Invalid key.')

        if not acl.is_privileged_user():
            self.abort(403, 'Implement access control based on the user')

        request = request_future.get_result()
        parent_task_future = None
        if request.parent_task_id:
            parent_key = task_pack.unpack_run_result_key(
                request.parent_task_id)
            parent_task_future = parent_key.get_async()
        children_tasks_futures = [
            task_pack.unpack_result_summary_key(c).get_async()
            for c in result.children_task_ids
        ]

        bot_id = result.bot_id
        following_task_future = None
        previous_task_future = None
        if result.started_ts:
            # Use a shortcut name because it becomes unwieldy otherwise.
            cls = task_result.TaskRunResult

            # Note that the links will be to the TaskRunResult, not to
            # TaskResultSummary.
            following_task_future = cls.query(
                cls.bot_id == bot_id,
                cls.started_ts > result.started_ts,
            ).order(cls.started_ts).get_async()
            previous_task_future = cls.query(
                cls.bot_id == bot_id,
                cls.started_ts < result.started_ts,
            ).order(-cls.started_ts).get_async()

        bot_future = (bot_management.get_info_key(bot_id).get_async()
                      if bot_id else None)

        following_task = None
        if following_task_future:
            following_task = following_task_future.get_result()

        previous_task = None
        if previous_task_future:
            previous_task = previous_task_future.get_result()

        parent_task = None
        if parent_task_future:
            parent_task = parent_task_future.get_result()
        children_tasks = [c.get_result() for c in children_tasks_futures]

        params = {
            'bot': bot_future.get_result() if bot_future else None,
            'children_tasks': children_tasks,
            'is_admin': acl.is_admin(),
            'is_gae_admin': users.is_current_user_admin(),
            'is_privileged_user': acl.is_privileged_user(),
            'following_task': following_task,
            'full_appid': os.environ['APPLICATION_ID'],
            'host_url': self.request.host_url,
            'is_running': result.state == task_result.State.RUNNING,
            'now': utils.utcnow(),
            'parent_task': parent_task,
            'previous_task': previous_task,
            'request': request,
            'task': result,
            'xsrf_token': self.generate_xsrf_token(),
        }
        self.response.write(template.render('swarming/user_task.html', params))
Example #54
0
    def test_non_empty(self):
        self.mock_now(datetime.datetime(2014, 1, 1, 1, 1, 1))

        state = model.AuthReplicationState(key=model.replication_state_key(),
                                           primary_id='blah',
                                           primary_url='https://blah',
                                           auth_db_rev=123)
        state.put()

        global_config = model.AuthGlobalConfig(
            key=model.root_key(),
            modified_ts=utils.utcnow(),
            modified_by=model.Identity.from_bytes('user:[email protected]'),
            oauth_client_id='oauth_client_id',
            oauth_client_secret='oauth_client_secret',
            oauth_additional_client_ids=['a', 'b'])
        global_config.put()

        group = model.AuthGroup(
            key=model.group_key('Some group'),
            members=[model.Identity.from_bytes('user:[email protected]')],
            globs=[model.IdentityGlob.from_bytes('user:*@example.com')],
            nested=[],
            description='Some description',
            owners='owning-group',
            created_ts=utils.utcnow(),
            created_by=model.Identity.from_bytes('user:[email protected]'),
            modified_ts=utils.utcnow(),
            modified_by=model.Identity.from_bytes('user:[email protected]'))
        group.put()

        another = model.AuthGroup(key=model.group_key('Another group'),
                                  nested=['Some group'])
        another.put()

        global_secret = model.AuthSecret(
            id='global_secret',
            parent=model.secret_scope_key('global'),
            values=['1234', '5678'],
            modified_ts=utils.utcnow(),
            modified_by=model.Identity.from_bytes('user:[email protected]'))
        global_secret.put()

        # Local secret should not appear in a snapshot.
        local_secret = model.AuthSecret(
            id='local_secret',
            parent=model.secret_scope_key('local'),
            values=['1234', '5678'],
            modified_ts=utils.utcnow(),
            modified_by=model.Identity.from_bytes('user:[email protected]'))
        local_secret.put()

        ip_whitelist = model.AuthIPWhitelist(
            key=model.ip_whitelist_key('bots'),
            subnets=['127.0.0.1/32'],
            description='Some description',
            created_ts=utils.utcnow(),
            created_by=model.Identity.from_bytes('user:[email protected]'),
            modified_ts=utils.utcnow(),
            modified_by=model.Identity.from_bytes('user:[email protected]'))
        ip_whitelist.put()

        ip_whitelist_assignments = model.AuthIPWhitelistAssignments(
            key=model.ip_whitelist_assignments_key(),
            modified_ts=utils.utcnow(),
            modified_by=model.Identity.from_bytes('user:[email protected]'),
            assignments=[
                model.AuthIPWhitelistAssignments.Assignment(
                    identity=model.Identity.from_bytes(
                        'user:[email protected]'),
                    ip_whitelist='bots',
                    comment='some comment',
                    created_ts=utils.utcnow(),
                    created_by=model.Identity.from_bytes(
                        'user:[email protected]')),
            ])
        ip_whitelist_assignments.put()

        captured_state, snapshot = replication.new_auth_db_snapshot()

        expected_state = {
            'auth_db_rev': 123,
            'modified_ts': datetime.datetime(2014, 1, 1, 1, 1, 1),
            'primary_id': u'blah',
            'primary_url': u'https://blah',
        }
        self.assertEqual(expected_state, captured_state.to_dict())

        expected_snapshot = {
            'global_config': {
                '__id__':
                'root',
                '__parent__':
                None,
                'auth_db_rev':
                None,
                'auth_db_prev_rev':
                None,
                'modified_by':
                model.Identity(kind='user', name='*****@*****.**'),
                'modified_ts':
                datetime.datetime(2014, 1, 1, 1, 1, 1),
                'oauth_additional_client_ids': [u'a', u'b'],
                'oauth_client_id':
                u'oauth_client_id',
                'oauth_client_secret':
                u'oauth_client_secret',
            },
            'groups': [
                {
                    '__id__': 'Another group',
                    '__parent__': ndb.Key('AuthGlobalConfig', 'root'),
                    'auth_db_rev': None,
                    'auth_db_prev_rev': None,
                    'created_by': None,
                    'created_ts': None,
                    'description': u'',
                    'globs': [],
                    'members': [],
                    'modified_by': None,
                    'modified_ts': None,
                    'nested': [u'Some group'],
                    'owners': u'administrators',
                },
                {
                    '__id__':
                    'Some group',
                    '__parent__':
                    ndb.Key('AuthGlobalConfig', 'root'),
                    'auth_db_rev':
                    None,
                    'auth_db_prev_rev':
                    None,
                    'created_by':
                    model.Identity(kind='user', name='*****@*****.**'),
                    'created_ts':
                    datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'description':
                    u'Some description',
                    'globs':
                    [model.IdentityGlob(kind='user', pattern='*@example.com')],
                    'members':
                    [model.Identity(kind='user', name='*****@*****.**')],
                    'modified_by':
                    model.Identity(kind='user', name='*****@*****.**'),
                    'modified_ts':
                    datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'nested': [],
                    'owners':
                    u'owning-group',
                },
            ],
            'secrets': [
                {
                    '__id__':
                    'global_secret',
                    '__parent__':
                    ndb.Key('AuthGlobalConfig', 'root', 'AuthSecretScope',
                            'global'),
                    'modified_by':
                    model.Identity(kind='user', name='*****@*****.**'),
                    'modified_ts':
                    datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'values': ['1234', '5678'],
                },
            ],
            'ip_whitelists': [
                {
                    '__id__':
                    'bots',
                    '__parent__':
                    ndb.Key('AuthGlobalConfig', 'root'),
                    'auth_db_rev':
                    None,
                    'auth_db_prev_rev':
                    None,
                    'created_by':
                    model.Identity(kind='user', name='*****@*****.**'),
                    'created_ts':
                    datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'description':
                    u'Some description',
                    'modified_by':
                    model.Identity(kind='user', name='*****@*****.**'),
                    'modified_ts':
                    datetime.datetime(2014, 1, 1, 1, 1, 1),
                    'subnets': [u'127.0.0.1/32'],
                },
            ],
            'ip_whitelist_assignments': {
                '__id__':
                'default',
                '__parent__':
                ndb.Key('AuthGlobalConfig', 'root'),
                'assignments': [
                    {
                        'comment':
                        u'some comment',
                        'created_by':
                        model.Identity(kind='user',
                                       name='*****@*****.**'),
                        'created_ts':
                        datetime.datetime(2014, 1, 1, 1, 1, 1),
                        'identity':
                        model.Identity(kind='user',
                                       name='*****@*****.**'),
                        'ip_whitelist':
                        u'bots',
                    },
                ],
                'auth_db_rev':
                None,
                'auth_db_prev_rev':
                None,
                'modified_by':
                model.Identity(kind='user', name='*****@*****.**'),
                'modified_ts':
                datetime.datetime(2014, 1, 1, 1, 1, 1),
            },
        }
        self.assertEqual(expected_snapshot, snapshot_to_dict(snapshot))
Example #55
0
    def get(self):
        """Handles both ndb.Query searches and search.Index().search() queries.

    If |task_name| is set or not affects the meaning of |cursor|. When set, the
    cursor is for search.Index, otherwise the cursor is for a ndb.Query.
    """
        cursor_str = self.request.get('cursor')
        limit = int(self.request.get('limit', 100))
        sort = self.request.get('sort', self.SORT_CHOICES[0][0])
        state = self.request.get('state', self.STATE_CHOICES[0][0][0])
        task_name = self.request.get('task_name', '').strip()
        task_tags = [
            line for line in self.request.get('task_tag', '').splitlines()
            if line
        ]

        if not any(sort == i[0] for i in self.SORT_CHOICES):
            self.abort(400, 'Invalid sort')
        if not any(any(state == i[0] for i in j) for j in self.STATE_CHOICES):
            self.abort(400, 'Invalid state')

        if sort != 'created_ts':
            # Zap all filters in this case to reduce the number of required indexes.
            # Revisit according to the user requests.
            state = 'all'

        now = utils.utcnow()
        counts_future = self._get_counts_future(now)

        # This call is synchronous.
        try:
            tasks, cursor_str, sort, state = task_result.get_tasks(
                task_name, task_tags, cursor_str, limit, sort, state)

            # Prefetch the TaskRequest all at once, so that ndb's in-process cache has
            # it instead of fetching them one at a time indirectly when using
            # TaskResultSummary.request_key.get().
            futures = ndb.get_multi_async(t.request_key for t in tasks)

            # Evaluate the counts to print the filtering columns with the associated
            # numbers.
            state_choices = self._get_state_choices(counts_future)
        except (search.QueryError, ValueError) as e:
            self.abort(400, str(e))

        def safe_sum(items):
            return sum(items, datetime.timedelta())

        def avg(items):
            if not items:
                return 0.
            return safe_sum(items) / len(items)

        def median(items):
            if not items:
                return 0.
            middle = len(items) / 2
            if len(items) % 2:
                return items[middle]
            return (items[middle - 1] + items[middle]) / 2

        gen = (t.duration_now(now) for t in tasks)
        durations = sorted(t for t in gen if t is not None)
        gen = (t.pending_now(now) for t in tasks)
        pendings = sorted(t for t in gen if t is not None)
        total_cost_usd = sum(t.cost_usd for t in tasks)
        total_cost_saved_usd = sum(t.cost_saved_usd for t in tasks
                                   if t.cost_saved_usd)
        total_saved = safe_sum(t.duration for t in tasks if t.deduped_from)
        duration_sum = safe_sum(durations)
        total_saved_percent = ((100. * total_saved.total_seconds() /
                                duration_sum.total_seconds())
                               if duration_sum else 0.)
        params = {
            'cursor': cursor_str,
            'duration_average': avg(durations),
            'duration_median': median(durations),
            'duration_sum': duration_sum,
            'has_pending': any(t.is_pending for t in tasks),
            'has_running': any(t.is_running for t in tasks),
            'is_admin': acl.is_admin(),
            'is_privileged_user': acl.is_privileged_user(),
            'limit': limit,
            'now': now,
            'pending_average': avg(pendings),
            'pending_median': median(pendings),
            'pending_sum': safe_sum(pendings),
            'show_footer': bool(pendings or durations),
            'sort': sort,
            'sort_choices': self.SORT_CHOICES,
            'state': state,
            'state_choices': state_choices,
            'task_name': task_name,
            'task_tag': '\n'.join(task_tags),
            'tasks': tasks,
            'total_cost_usd': total_cost_usd,
            'total_cost_saved_usd': total_cost_saved_usd,
            'total_saved': total_saved,
            'total_saved_percent': total_saved_percent,
            'xsrf_token': self.generate_xsrf_token(),
        }
        # TODO(maruel): If admin or if the user is task's .user, show the Cancel
        # button. Do not show otherwise.
        self.response.write(template.render('swarming/user_tasks.html',
                                            params))

        # Do not let dangling futures linger around.
        ndb.Future.wait_all(futures)
Example #56
0
def _get_last_good_async(config_set, path, dest_type):
  """Returns last good (rev, config) and updates last_access_ts if needed."""
  now = utils.utcnow()
  last_good_id = '%s:%s' % (config_set, path)

  proto_message_name = None
  if dest_type and issubclass(dest_type, protobuf.message.Message):
    proto_message_name = dest_type.DESCRIPTOR.full_name
    try:
      protobuf.symbol_database.Default().GetSymbol(proto_message_name)
    except KeyError:  # pragma: no cover
      logging.exception(
          'Recompile %s proto message with the latest protoc',
          proto_message_name)
      proto_message_name = None

  last_good = yield LastGoodConfig.get_by_id_async(last_good_id)

  # If entity does not exist, or its last_access_ts wasn't updated for a while
  # or its proto_message_name is not up to date, then update the entity.
  if (not last_good or
      not last_good.last_access_ts or
      now - last_good.last_access_ts > UPDATE_LAST_ACCESS_TIME_FREQUENCY or
      last_good.proto_message_name != proto_message_name):
    # pylint does not like this usage of transactional_tasklet
    # pylint: disable=no-value-for-parameter
    @ndb.transactional_tasklet
    def update():
      last_good = yield LastGoodConfig.get_by_id_async(last_good_id)
      last_good = last_good or LastGoodConfig(id=last_good_id)
      last_good.last_access_ts = now
      if last_good.proto_message_name != proto_message_name:
        last_good.content_binary = None
        last_good.proto_message_name = proto_message_name
      yield last_good.put_async()
    yield update()

  if not last_good or not last_good.revision:
    # The config wasn't loaded yet.
    raise ndb.Return(None, None)

  force_text = False
  if last_good.proto_message_name != proto_message_name:
    logging.error(
        ('Config message type for %s:%s differs in the datastore (%s) and in '
         'the code (%s). We have updated the cron job to parse configs using '
         'new message type, so this error should disappear soon. '
         'If it persists, check logs of the cron job that updates the configs.'
        ),
        config_set, path, last_good.proto_message_name,
        proto_message_name)
    # Since the message type is not necessarily the same, it is safer to
    # unsuccessfully load config as text than successfully load a binary config
    # of an entirely different message type.
    force_text = True

  cfg = None
  if proto_message_name:
    if not last_good.content_binary or force_text:
      logging.warning('loading a proto config from text, not binary')
    else:
      cfg = dest_type()
      cfg.MergeFromString(last_good.content_binary)
  cfg = cfg or common._convert_config(last_good.content, dest_type)
  raise ndb.Return(last_good.revision, cfg)
Example #57
0
  def _update_last_good_config_async(self, config_key):
    now = utils.utcnow()
    current = yield config_key.get_async()
    earliest_access_ts = now - CONFIG_MAX_TIME_SINCE_LAST_ACCESS
    if current.last_access_ts < earliest_access_ts:
      # Last access time was too long ago.
      yield current.key.delete_async()
      return

    config_set, path = config_key.id().split(':', 1)
    revision, content_hash = yield self.get_config_hash_async(
        config_set, path, use_memcache=False)
    if not revision:
      logging.warning(
          'Could not fetch hash of latest %s', config_key.id())
      return

    binary_missing = (
      current.proto_message_name and not current.content_binary)
    if current.revision == revision and not binary_missing:
      assert current.content_hash == content_hash
      return

    content = None
    if current.content_hash != content_hash:
      content = yield self.get_config_by_hash_async(content_hash)
      if content is None:
        logging.warning(
            'Could not fetch config content %s by hash %s',
            config_key.id(), content_hash)
        return
      logging.debug('Validating %s:%s@%s', config_set, path, revision)
      ctx = validation.Context.logging()
      validation.validate(config_set, path, content, ctx=ctx)
      if ctx.result().has_errors:
        logging.exception(
            'Invalid config %s:%s@%s is ignored', config_set, path, revision)
        return

    # content may be None if we think that it matches what we have locally.

    @ndb.transactional_tasklet
    def update():
      config = yield config_key.get_async()
      config.revision = revision
      if config.content_hash != content_hash:
        if content is None:
          # Content hash matched before we started the transaction.
          # Config was updated between content_hash was resolved and
          # the transaction has started. Do nothing, next cron run will
          # get a new hash.
          return
        config.content_hash = content_hash
        config.content = content
        config.content_binary = None  # Invalidate to refresh below.

      if config.proto_message_name and not config.content_binary:
        try:
          config.content_binary = _content_to_binary(
              config.proto_message_name, config.content)
        except common.ConfigFormatError:
          logging.exception(
              'Invalid config %s:%s@%s is ignored', config_set, path,
              revision)
          return

      yield config.put_async()
      logging.info(
          'Updated last good config %s to %s',
          config_key.id(), revision)
    yield update()
Example #58
0
    def test_integration(self):
        # Creates a TaskRequest, along its TaskResultSummary and TaskToRun. Have a
        # bot reap the task, and complete the task. Ensure the resulting
        # TaskResultSummary and TaskRunResult are properly updated.
        request = task_request.make_request(_gen_request(), True)
        result_summary = task_result.new_result_summary(request)
        to_run = task_to_run.new_task_to_run(request)
        result_summary.modified_ts = utils.utcnow()
        ndb.transaction(lambda: ndb.put_multi([result_summary, to_run]))
        expected = {
            'abandoned_ts': None,
            'bot_dimensions': None,
            'bot_id': None,
            'bot_version': None,
            'children_task_ids': [],
            'completed_ts': None,
            'costs_usd': [],
            'cost_saved_usd': None,
            'created_ts': self.now,
            'deduped_from': None,
            'durations': [],
            'exit_codes': [],
            'failure': False,
            'id': '1d69b9f088008810',
            'internal_failure': False,
            'modified_ts': self.now,
            'name': u'Request name',
            'outputs_ref': None,
            'properties_hash': None,
            'server_versions': [],
            'started_ts': None,
            'state': task_result.State.PENDING,
            'try_number': None,
            'tags': [u'priority:50', u'tag:1', u'user:Jesus'],
            'user': u'Jesus',
        }
        self.assertEqual(expected, result_summary.to_dict())

        # Nothing changed 2 secs later except latency.
        self.mock_now(self.now, 2)
        self.assertEqual(expected, result_summary.to_dict())

        # Task is reaped after 2 seconds (4 secs total).
        reap_ts = self.now + datetime.timedelta(seconds=4)
        self.mock_now(reap_ts)
        to_run.queue_number = None
        to_run.put()
        run_result = task_result.new_run_result(request, 1, 'localhost', 'abc',
                                                {})
        run_result.modified_ts = utils.utcnow()
        result_summary.set_from_run_result(run_result, request)
        ndb.transaction(lambda: ndb.put_multi((result_summary, run_result)))
        expected = {
            'abandoned_ts': None,
            'bot_dimensions': {},
            'bot_id': u'localhost',
            'bot_version': u'abc',
            'children_task_ids': [],
            'completed_ts': None,
            'costs_usd': [0.],
            'cost_saved_usd': None,
            'created_ts': self.now,
            'deduped_from': None,
            'durations': [],
            'exit_codes': [],
            'failure': False,
            'id': '1d69b9f088008810',
            'internal_failure': False,
            'modified_ts': reap_ts,
            'name': u'Request name',
            'outputs_ref': None,
            'properties_hash': None,
            'server_versions': [u'v1a'],
            'started_ts': reap_ts,
            'state': task_result.State.RUNNING,
            'tags': [u'priority:50', u'tag:1', u'user:Jesus'],
            'try_number': 1,
            'user': u'Jesus',
        }
        self.assertEqual(expected, result_summary.key.get().to_dict())

        # Task completed after 2 seconds (6 secs total), the task has been running
        # for 2 seconds.
        complete_ts = self.now + datetime.timedelta(seconds=6)
        self.mock_now(complete_ts)
        run_result.completed_ts = complete_ts
        run_result.exit_codes.append(0)
        run_result.state = task_result.State.COMPLETED
        run_result.modified_ts = utils.utcnow()
        ndb.transaction(
            lambda: ndb.put_multi(run_result.append_output(0, 'foo', 0)))
        result_summary.set_from_run_result(run_result, request)
        ndb.transaction(lambda: ndb.put_multi((result_summary, run_result)))
        expected = {
            'abandoned_ts': None,
            'bot_dimensions': {},
            'bot_id': u'localhost',
            'bot_version': u'abc',
            'children_task_ids': [],
            'completed_ts': complete_ts,
            'costs_usd': [0.],
            'cost_saved_usd': None,
            'created_ts': self.now,
            'deduped_from': None,
            'durations': [],
            'exit_codes': [0],
            'failure': False,
            'id': '1d69b9f088008810',
            'internal_failure': False,
            'modified_ts': complete_ts,
            'name': u'Request name',
            'outputs_ref': None,
            'properties_hash': None,
            'server_versions': [u'v1a'],
            'started_ts': reap_ts,
            'state': task_result.State.COMPLETED,
            'tags': [u'priority:50', u'tag:1', u'user:Jesus'],
            'try_number': 1,
            'user': u'Jesus',
        }
        self.assertEqual(expected, result_summary.key.get().to_dict())
        self.assertEqual(['foo'], list(result_summary.get_outputs()))
        self.assertEqual(datetime.timedelta(seconds=2),
                         result_summary.duration_total)
        self.assertEqual(datetime.timedelta(seconds=2),
                         result_summary.duration_now(utils.utcnow()))
        self.assertEqual(datetime.timedelta(seconds=4), result_summary.pending)
        self.assertEqual(datetime.timedelta(seconds=4),
                         result_summary.pending_now(utils.utcnow()))

        self.assertEqual(task_pack.pack_result_summary_key(result_summary.key),
                         result_summary.task_id)
        self.assertEqual(complete_ts, result_summary.ended_ts)
        self.assertEqual(task_pack.pack_run_result_key(run_result.key),
                         run_result.task_id)
        self.assertEqual(complete_ts, run_result.ended_ts)
Example #59
0
def yield_next_available_task_to_dispatch(bot_dimensions):
  """Yields next available (TaskRequest, TaskToRun) in decreasing order of
  priority.

  Once the caller determines the task is suitable to execute, it must use
  reap_task_to_run(task.key) to mark that it is not to be scheduled anymore.

  Performance is the top most priority here.

  Arguments:
  - bot_dimensions: dimensions (as a dict) defined by the bot that can be
      matched.
  """
  # List of all the valid dimensions hashed.
  accepted_dimensions_hash = frozenset(
      _hash_dimensions(utils.encode_to_json(i))
      for i in _powerset(bot_dimensions))
  now = utils.utcnow()
  broken = 0
  cache_lookup = 0
  expired = 0
  hash_mismatch = 0
  ignored = 0
  no_queue = 0
  real_mismatch = 0
  total = 0
  # Be very aggressive in fetching the largest amount of items as possible. Note
  # that we use the default ndb.EVENTUAL_CONSISTENCY so stale items may be
  # returned. It's handled specifically.
  # - 100/200 gives 2s~40s of query time for 1275 items.
  # - 250/500 gives 2s~50s of query time for 1275 items.
  # - 50/500 gives 3s~20s of query time for 1275 items. (Slower but less
  #   variance). Spikes in 20s~40s are rarer.
  # The problem here are:
  # - Outliers, some shards are simply slower at executing the query.
  # - Median time, which we should optimize.
  # - Abusing batching will slow down this query.
  #
  # TODO(maruel): Measure query performance with stats_framework!!
  # TODO(maruel): Use fetch_page_async() + ndb.get_multi_async() +
  # memcache.get_multi_async() to do pipelined processing. Should greatly reduce
  # the effect of latency on the total duration of this function. I also suspect
  # using ndb.get_multi() will return fresher objects than what is returned by
  # the query.
  opts = ndb.QueryOptions(batch_size=50, prefetch_size=500, keys_only=True)
  try:
    # Interestingly, the filter on .queue_number>0 is required otherwise all the
    # None items are returned first.
    q = TaskToRun.query(default_options=opts).order(
        TaskToRun.queue_number).filter(TaskToRun.queue_number > 0)
    for task_key in q:
      duration = (utils.utcnow() - now).total_seconds()
      if duration > 40.:
        # Stop searching after too long, since the odds of the request blowing
        # up right after succeeding in reaping a task is not worth the dangling
        # task request that will stay in limbo until the cron job reaps it and
        # retry it. The current handlers are given 60s to complete. By using
        # 40s, it gives 20s to complete the reaping and complete the HTTP
        # request.
        return

      total += 1
      # Verify TaskToRun is what is expected. Play defensive here.
      try:
        validate_to_run_key(task_key)
      except ValueError as e:
        logging.error(str(e))
        broken += 1
        continue

      # integer_id() == dimensions_hash.
      if task_key.integer_id() not in accepted_dimensions_hash:
        hash_mismatch += 1
        continue

      # Do this after the basic weeding out but before fetching TaskRequest.
      if _lookup_cache_is_taken(task_key):
        cache_lookup += 1
        continue

      # Ok, it's now worth taking a real look at the entity.
      task = task_key.get(use_cache=False)

      # DB operations are slow, double check memcache again.
      if _lookup_cache_is_taken(task_key):
        cache_lookup += 1
        continue

      # It is possible for the index to be inconsistent since it is not executed
      # in a transaction, no problem.
      if not task.queue_number:
        no_queue += 1
        continue

      # It expired. A cron job will cancel it eventually. Since 'now' is saved
      # before the query, an expired task may still be reaped even if
      # technically expired if the query is very slow. This is on purpose so
      # slow queries do not cause exagerate expirations.
      if task.expiration_ts < now:
        expired += 1
        continue

      # The hash may have conflicts. Ensure the dimensions actually match by
      # verifying the TaskRequest. There's a probability of 2**-31 of conflicts,
      # which is low enough for our purpose. The reason use_cache=False is
      # otherwise it'll create a buffer bloat.
      request = task.request_key.get(use_cache=False)
      if not match_dimensions(request.properties.dimensions, bot_dimensions):
        real_mismatch += 1
        continue

      # It's a valid task! Note that in the meantime, another bot may have
      # reaped it.
      yield request, task
      ignored += 1
  finally:
    duration = (utils.utcnow() - now).total_seconds()
    logging.info(
        '%d/%s in %5.2fs: %d total, %d exp %d no_queue, %d hash mismatch, '
        '%d cache negative, %d dimensions mismatch, %d ignored, %d broken',
        opts.batch_size,
        opts.prefetch_size,
        duration,
        total,
        expired,
        no_queue,
        hash_mismatch,
        cache_lookup,
        real_mismatch,
        ignored,
        broken)
Example #60
0
 def assignment(ident, ip_whitelist):
     return model.AuthIPWhitelistAssignments.Assignment(
         identity=model.Identity.from_bytes(ident),
         ip_whitelist=ip_whitelist,
         created_ts=utils.utcnow(),
         comment='comment')