Beispiel #1
0
 def _query_from_request(self, request, sort=None):
     """Returns a TaskResultSummary query."""
     start = message_conversion.epoch_to_datetime(request.start)
     end = message_conversion.epoch_to_datetime(request.end)
     return task_result.get_result_summaries_query(
         start, end, sort or request.sort.name.lower(),
         request.state.name.lower(), request.tags)
Beispiel #2
0
 def list(self, request):
     """Provides a list of available tasks."""
     logging.info('%s', request)
     try:
         start = message_conversion.epoch_to_datetime(request.start)
         end = message_conversion.epoch_to_datetime(request.end)
         now = utils.utcnow()
         query = task_result.get_result_summaries_query(
             start, end, request.sort.name.lower(),
             request.state.name.lower(), request.tags)
         items, cursor = datastore_utils.fetch_page(query, request.limit,
                                                    request.cursor)
     except ValueError as e:
         raise endpoints.BadRequestException(
             'Inappropriate filter for tasks/list: %s' % e)
     except datastore_errors.NeedIndexError as e:
         logging.error('%s', e)
         raise endpoints.BadRequestException(
             'Requires new index, ask admin to create one.')
     except datastore_errors.BadArgumentError as e:
         logging.error('%s', e)
         raise endpoints.BadRequestException(
             'This combination is unsupported, sorry.')
     return swarming_rpcs.TaskList(
         cursor=cursor,
         items=[message_conversion.task_result_to_rpc(i) for i in items],
         now=now)
Beispiel #3
0
 def list(self, request):
   """Provides a list of available tasks."""
   logging.info('%s', request)
   try:
     start = message_conversion.epoch_to_datetime(request.start)
     end = message_conversion.epoch_to_datetime(request.end)
     now = utils.utcnow()
     query = task_result.get_result_summaries_query(
         start, end,
         request.sort.name.lower(),
         request.state.name.lower(),
         request.tags)
     items, cursor = datastore_utils.fetch_page(
         query, request.limit, request.cursor)
   except ValueError as e:
     raise endpoints.BadRequestException(
         'Inappropriate filter for tasks/list: %s' % e)
   except datastore_errors.NeedIndexError as e:
     logging.error('%s', e)
     raise endpoints.BadRequestException(
         'Requires new index, ask admin to create one.')
   except datastore_errors.BadArgumentError as e:
     logging.error('%s', e)
     raise endpoints.BadRequestException(
         'This combination is unsupported, sorry.')
   return swarming_rpcs.TaskList(
       cursor=cursor,
       items=[message_conversion.task_result_to_rpc(i) for i in items],
       now=now)
Beispiel #4
0
  def get(self):
    logging.error('Unexpected old client')
    extra = frozenset(self.request.GET) - self.EXPECTED
    if extra:
      self.abort_with_error(
          400,
          error='Extraneous query parameters. Did you make a typo? %s' %
          ','.join(sorted(extra)))

    interval = self.request.get('interval', 24 * 3600)
    state = self.request.get('state', 'all')
    tags = self.request.get_all('tag')

    try:
      interval = int(interval)
      if interval <= 0:
        raise ValueError()
    except ValueError:
      self.abort_with_error(
          400, error='"interval" must be a positive integer number of seconds')

    if state not in self.VALID_STATES:
      self.abort_with_error(
          400,
          error='Invalid state "%s", expecting on of %s' %
          (state, ', '.join(sorted(self.VALID_STATES))))

    cutoff = utils.utcnow() - datetime.timedelta(seconds=interval)
    query = task_result.get_result_summaries_query(
        cutoff, None, 'created_ts', state, tags)
    self.send_response(utils.to_json_encodable({'count': query.count()}))
Beispiel #5
0
    def get(self):
        logging.error('Unexpected old client')
        extra = frozenset(self.request.GET) - self.EXPECTED
        if extra:
            self.abort_with_error(
                400,
                error='Extraneous query parameters. Did you make a typo? %s' %
                ','.join(sorted(extra)))

        interval = self.request.get('interval', 24 * 3600)
        state = self.request.get('state', 'all')
        tags = self.request.get_all('tag')

        try:
            interval = int(interval)
            if interval <= 0:
                raise ValueError()
        except ValueError:
            self.abort_with_error(
                400,
                error='"interval" must be a positive integer number of seconds'
            )

        if state not in self.VALID_STATES:
            self.abort_with_error(
                400,
                error='Invalid state "%s", expecting on of %s' %
                (state, ', '.join(sorted(self.VALID_STATES))))

        cutoff = utils.utcnow() - datetime.timedelta(seconds=interval)
        query = task_result.get_result_summaries_query(cutoff, None,
                                                       'created_ts', state,
                                                       tags)
        self.send_response(utils.to_json_encodable({'count': query.count()}))
Beispiel #6
0
 def _get_counts_future(self, now):
   """Returns all the counting futures in parallel."""
   counts_future = {}
   last_24h = now - datetime.timedelta(days=1)
   for state_key, _, _ in itertools.chain.from_iterable(self.STATE_CHOICES):
     query = task_result.get_result_summaries_query(
         last_24h, None, 'created_ts', state_key, None)
     counts_future[state_key] = query.count_async()
   return counts_future
Beispiel #7
0
 def _get_counts_future(self, now):
   """Returns all the counting futures in parallel."""
   counts_future = {}
   last_24h = now - datetime.timedelta(days=1)
   for state_key, _, _ in itertools.chain.from_iterable(self.STATE_CHOICES):
     query = task_result.get_result_summaries_query(
         last_24h, None, 'created_ts', state_key, None)
     counts_future[state_key] = query.count_async()
   return counts_future
 def _query_from_request(self, request, sort=None):
   """Returns a TaskResultSummary query."""
   start = message_conversion.epoch_to_datetime(request.start)
   end = message_conversion.epoch_to_datetime(request.end)
   return task_result.get_result_summaries_query(
       start, end,
       sort or request.sort.name.lower(),
       request.state.name.lower(),
       request.tags)
Beispiel #9
0
def _set_jobs_metrics():
  state_map = {task_result.State.RUNNING: 'running',
               task_result.State.PENDING: 'pending'}
  query_iter = task_result.get_result_summaries_query(
      None, None, 'created_ts', 'pending_running', None).iter()
  while (yield query_iter.has_next_async()):
    summary = query_iter.next()
    status = state_map.get(summary.state, '')
    fields = extract_job_fields(summary.tags, summary.bot_id or '')
    jobs_status.set(status, target_fields=TARGET_FIELDS, fields=fields)
Beispiel #10
0
 def count(self, request):
     """Counts number of tasks in a given state."""
     logging.info('%s', request)
     if not request.start:
         raise endpoints.BadRequestException('start (as epoch) is required')
     if not request.end:
         raise endpoints.BadRequestException('end (as epoch) is required')
     try:
         now = utils.utcnow()
         query = task_result.get_result_summaries_query(
             message_conversion.epoch_to_datetime(request.start),
             message_conversion.epoch_to_datetime(request.end),
             'created_ts', request.state.name.lower(), request.tags)
         count = query.count()
     except ValueError as e:
         raise endpoints.BadRequestException(
             'Inappropriate filter for tasks/count: %s' % e)
     return swarming_rpcs.TasksCount(count=count, now=now)
Beispiel #11
0
def _set_jobs_metrics(now):
  state_map = {task_result.State.RUNNING: 'running',
               task_result.State.PENDING: 'pending'}
  query_iter = task_result.get_result_summaries_query(
      None, None, 'created_ts', 'pending_running', None).iter()
  jobs_counts = defaultdict(lambda: 0)
  jobs_pending_distributions = defaultdict(
      lambda: gae_ts_mon.Distribution(_bucketer))
  jobs_max_pending_durations = defaultdict(
      lambda: 0.0)
  while (yield query_iter.has_next_async()):
    summary = query_iter.next()
    status = state_map.get(summary.state, '')
    fields = extract_job_fields(summary.tags)
    target_fields = dict(TARGET_FIELDS)
    if summary.bot_id:
      target_fields['hostname'] = 'autogen:' + summary.bot_id
    if summary.bot_id and status == 'running':
      jobs_running.set(True, target_fields=target_fields, fields=fields)
    fields['status'] = status

    key = tuple(sorted(fields.iteritems()))

    jobs_counts[key] += 1

    pending_duration = summary.pending_now(now)
    if pending_duration is not None:
      jobs_pending_distributions[key].add(pending_duration.total_seconds())
      jobs_max_pending_durations[key] = max(
          jobs_max_pending_durations[key],
          pending_duration.total_seconds())

  for key, count in jobs_counts.iteritems():
    jobs_active.set(count, target_fields=TARGET_FIELDS, fields=dict(key))

  for key, distribution in jobs_pending_distributions.iteritems():
    jobs_pending_durations.set(
        distribution, target_fields=TARGET_FIELDS, fields=dict(key))

  for key, val in jobs_max_pending_durations.iteritems():
    jobs_max_pending_duration.set(
        val, target_fields=TARGET_FIELDS, fields=dict(key))
Beispiel #12
0
 def count(self, request):
   """Counts number of tasks in a given state."""
   logging.info('%s', request)
   if not request.start:
     raise endpoints.BadRequestException('start (as epoch) is required')
   if not request.end:
     raise endpoints.BadRequestException('end (as epoch) is required')
   try:
     now = utils.utcnow()
     query = task_result.get_result_summaries_query(
         message_conversion.epoch_to_datetime(request.start),
         message_conversion.epoch_to_datetime(request.end),
         'created_ts',
         request.state.name.lower(),
         request.tags)
     count = query.count()
   except ValueError as e:
     raise endpoints.BadRequestException(
         'Inappropriate filter for tasks/count: %s' % e)
   return swarming_rpcs.TasksCount(count=count, now=now)
Beispiel #13
0
def _set_jobs_metrics(payload):
  params = _ShardParams(payload)

  state_map = {task_result.State.RUNNING: 'running',
               task_result.State.PENDING: 'pending'}
  jobs_counts = defaultdict(lambda: 0)
  jobs_total = 0
  jobs_pending_distributions = defaultdict(
      lambda: gae_ts_mon.Distribution(_bucketer))
  jobs_max_pending_durations = defaultdict(
      lambda: 0.0)

  query_iter = task_result.get_result_summaries_query(
      None, None, 'created_ts', 'pending_running', None).iter(
      produce_cursors=True, start_cursor=params.cursor)

  while query_iter.has_next():
    runtime = (utils.utcnow() - params.start_time).total_seconds()
    if jobs_total >= _JOBS_PER_SHARD or runtime > _REQUEST_TIMEOUT_SEC:
      params.cursor = query_iter.cursor_after()
      params.task_count += 1
      utils.enqueue_task(url='/internal/taskqueue/tsmon/jobs',
                         queue_name='tsmon',
                         payload=params.json())
      params.task_count -= 1  # For accurate logging below.
      break

    params.count += 1
    jobs_total += 1
    summary = query_iter.next()
    status = state_map.get(summary.state, '')
    fields = _extract_job_fields(summary.tags)
    target_fields = dict(_TARGET_FIELDS)
    if summary.bot_id:
      target_fields['hostname'] = 'autogen:' + summary.bot_id
    if summary.bot_id and status == 'running':
      _jobs_running.set(True, target_fields=target_fields, fields=fields)
    fields['status'] = status

    key = tuple(sorted(fields.iteritems()))

    jobs_counts[key] += 1

    pending_duration = summary.pending_now(utils.utcnow())
    if pending_duration is not None:
      jobs_pending_distributions[key].add(pending_duration.total_seconds())
      jobs_max_pending_durations[key] = max(
          jobs_max_pending_durations[key],
          pending_duration.total_seconds())

  logging.debug(
      '_set_jobs_metrics: task %d started at %s, processed %d jobs (%d total)',
      params.task_count, params.task_start, jobs_total, params.count)

  # Global counts are sharded by task_num and aggregated in queries.
  target_fields = dict(_TARGET_FIELDS)
  target_fields['task_num'] = params.task_count

  for key, count in jobs_counts.iteritems():
    _jobs_active.set(count, target_fields=target_fields, fields=dict(key))

  for key, distribution in jobs_pending_distributions.iteritems():
    _jobs_pending_durations.set(
        distribution, target_fields=target_fields, fields=dict(key))

  for key, val in jobs_max_pending_durations.iteritems():
    _jobs_max_pending_duration.set(
        val, target_fields=target_fields, fields=dict(key))
Beispiel #14
0
  def get(self):
    cursor_str = self.request.get('cursor')
    limit = int(self.request.get('limit', 100))
    sort = self.request.get('sort', self.SORT_CHOICES[0][0])
    state = self.request.get('state', self.STATE_CHOICES[0][0][0])
    counts = self.request.get('counts', '').strip()
    task_tags = [
      line for line in self.request.get('task_tag', '').splitlines() if line
    ]

    if not any(sort == i[0] for i in self.SORT_CHOICES):
      self.abort(400, 'Invalid sort')
    if not any(any(state == i[0] for i in j) for j in self.STATE_CHOICES):
      self.abort(400, 'Invalid state')

    if sort != 'created_ts':
      # Zap all filters in this case to reduce the number of required indexes.
      # Revisit according to the user requests.
      state = 'all'

    now = utils.utcnow()
    # "Temporarily" disable the count. This is too slow on the prod server
    # (>10s). The fix is to have the web page do a XHR query to get the values
    # asynchronously.
    counts_future = None
    if counts == 'true':
      counts_future = self._get_counts_future(now)

    try:
      if task_tags:
        # Enforce created_ts when tags are used.
        sort = 'created_ts'
      query = task_result.get_result_summaries_query(
          None, None, sort, state, task_tags)
      tasks, cursor_str = datastore_utils.fetch_page(query, limit, cursor_str)

      # Prefetch the TaskRequest all at once, so that ndb's in-process cache has
      # it instead of fetching them one at a time indirectly when using
      # TaskResultSummary.request_key.get().
      futures = ndb.get_multi_async(t.request_key for t in tasks)

      # Evaluate the counts to print the filtering columns with the associated
      # numbers.
      state_choices = self._get_state_choices(counts_future)
    except ValueError as e:
      self.abort(400, str(e))

    def safe_sum(items):
      return sum(items, datetime.timedelta())

    def avg(items):
      if not items:
        return 0.
      return safe_sum(items) / len(items)

    def median(items):
      if not items:
        return 0.
      middle = len(items) / 2
      if len(items) % 2:
        return items[middle]
      return (items[middle-1]+items[middle]) / 2

    gen = (t.duration_now(now) for t in tasks)
    durations = sorted(t for t in gen if t is not None)
    gen = (t.pending_now(now) for t in tasks)
    pendings = sorted(t for t in gen if t is not None)
    total_cost_usd = sum(t.cost_usd for t in tasks)
    total_cost_saved_usd = sum(
        t.cost_saved_usd for t in tasks if t.cost_saved_usd)
    # Include the overhead in the total amount of time saved, since it's
    # overhead saved.
    # In theory, t.duration_as_seen_by_server should always be set when
    # t.deduped_from is set but there has some broken entities in the datastore.
    total_saved = safe_sum(
        t.duration_as_seen_by_server for t in tasks
        if t.deduped_from and t.duration_as_seen_by_server)
    duration_sum = safe_sum(durations)
    total_saved_percent = (
        (100. * total_saved.total_seconds() / duration_sum.total_seconds())
        if duration_sum else 0.)

    try_link = '/tasklist?l=%d' % limit
    if task_tags:
      try_link += '&f=' + '&f='.join(task_tags)
    params = {
      'cursor': cursor_str,
      'duration_average': avg(durations),
      'duration_median': median(durations),
      'duration_sum': duration_sum,
      'has_pending': any(t.is_pending for t in tasks),
      'has_running': any(t.is_running for t in tasks),
      'is_admin': acl.is_admin(),
      'is_privileged_user': acl.is_privileged_user(),
      'limit': limit,
      'now': now,
      'pending_average': avg(pendings),
      'pending_median': median(pendings),
      'pending_sum': safe_sum(pendings),
      'show_footer': bool(pendings or durations),
      'sort': sort,
      'sort_choices': self.SORT_CHOICES,
      'state': state,
      'state_choices': state_choices,
      'task_tag': '\n'.join(task_tags),
      'tasks': tasks,
      'total_cost_usd': total_cost_usd,
      'total_cost_saved_usd': total_cost_saved_usd,
      'total_saved': total_saved,
      'total_saved_percent': total_saved_percent,
      'try_link': try_link,
      'xsrf_token': self.generate_xsrf_token(),
    }
    # TODO(maruel): If admin or if the user is task's .user, show the Cancel
    # button. Do not show otherwise.
    self.response.write(template.render('swarming/user_tasks.html', params))

    # Do not let dangling futures linger around.
    ndb.Future.wait_all(futures)