def _query_from_request(self, request, sort=None): """Returns a TaskResultSummary query.""" start = message_conversion.epoch_to_datetime(request.start) end = message_conversion.epoch_to_datetime(request.end) return task_result.get_result_summaries_query( start, end, sort or request.sort.name.lower(), request.state.name.lower(), request.tags)
def list(self, request): """Provides a list of available tasks.""" logging.info('%s', request) try: start = message_conversion.epoch_to_datetime(request.start) end = message_conversion.epoch_to_datetime(request.end) now = utils.utcnow() query = task_result.get_result_summaries_query( start, end, request.sort.name.lower(), request.state.name.lower(), request.tags) items, cursor = datastore_utils.fetch_page(query, request.limit, request.cursor) except ValueError as e: raise endpoints.BadRequestException( 'Inappropriate filter for tasks/list: %s' % e) except datastore_errors.NeedIndexError as e: logging.error('%s', e) raise endpoints.BadRequestException( 'Requires new index, ask admin to create one.') except datastore_errors.BadArgumentError as e: logging.error('%s', e) raise endpoints.BadRequestException( 'This combination is unsupported, sorry.') return swarming_rpcs.TaskList( cursor=cursor, items=[message_conversion.task_result_to_rpc(i) for i in items], now=now)
def list(self, request): """Provides a list of available tasks.""" logging.info('%s', request) try: start = message_conversion.epoch_to_datetime(request.start) end = message_conversion.epoch_to_datetime(request.end) now = utils.utcnow() query = task_result.get_result_summaries_query( start, end, request.sort.name.lower(), request.state.name.lower(), request.tags) items, cursor = datastore_utils.fetch_page( query, request.limit, request.cursor) except ValueError as e: raise endpoints.BadRequestException( 'Inappropriate filter for tasks/list: %s' % e) except datastore_errors.NeedIndexError as e: logging.error('%s', e) raise endpoints.BadRequestException( 'Requires new index, ask admin to create one.') except datastore_errors.BadArgumentError as e: logging.error('%s', e) raise endpoints.BadRequestException( 'This combination is unsupported, sorry.') return swarming_rpcs.TaskList( cursor=cursor, items=[message_conversion.task_result_to_rpc(i) for i in items], now=now)
def get(self): logging.error('Unexpected old client') extra = frozenset(self.request.GET) - self.EXPECTED if extra: self.abort_with_error( 400, error='Extraneous query parameters. Did you make a typo? %s' % ','.join(sorted(extra))) interval = self.request.get('interval', 24 * 3600) state = self.request.get('state', 'all') tags = self.request.get_all('tag') try: interval = int(interval) if interval <= 0: raise ValueError() except ValueError: self.abort_with_error( 400, error='"interval" must be a positive integer number of seconds') if state not in self.VALID_STATES: self.abort_with_error( 400, error='Invalid state "%s", expecting on of %s' % (state, ', '.join(sorted(self.VALID_STATES)))) cutoff = utils.utcnow() - datetime.timedelta(seconds=interval) query = task_result.get_result_summaries_query( cutoff, None, 'created_ts', state, tags) self.send_response(utils.to_json_encodable({'count': query.count()}))
def get(self): logging.error('Unexpected old client') extra = frozenset(self.request.GET) - self.EXPECTED if extra: self.abort_with_error( 400, error='Extraneous query parameters. Did you make a typo? %s' % ','.join(sorted(extra))) interval = self.request.get('interval', 24 * 3600) state = self.request.get('state', 'all') tags = self.request.get_all('tag') try: interval = int(interval) if interval <= 0: raise ValueError() except ValueError: self.abort_with_error( 400, error='"interval" must be a positive integer number of seconds' ) if state not in self.VALID_STATES: self.abort_with_error( 400, error='Invalid state "%s", expecting on of %s' % (state, ', '.join(sorted(self.VALID_STATES)))) cutoff = utils.utcnow() - datetime.timedelta(seconds=interval) query = task_result.get_result_summaries_query(cutoff, None, 'created_ts', state, tags) self.send_response(utils.to_json_encodable({'count': query.count()}))
def _get_counts_future(self, now): """Returns all the counting futures in parallel.""" counts_future = {} last_24h = now - datetime.timedelta(days=1) for state_key, _, _ in itertools.chain.from_iterable(self.STATE_CHOICES): query = task_result.get_result_summaries_query( last_24h, None, 'created_ts', state_key, None) counts_future[state_key] = query.count_async() return counts_future
def _set_jobs_metrics(): state_map = {task_result.State.RUNNING: 'running', task_result.State.PENDING: 'pending'} query_iter = task_result.get_result_summaries_query( None, None, 'created_ts', 'pending_running', None).iter() while (yield query_iter.has_next_async()): summary = query_iter.next() status = state_map.get(summary.state, '') fields = extract_job_fields(summary.tags, summary.bot_id or '') jobs_status.set(status, target_fields=TARGET_FIELDS, fields=fields)
def count(self, request): """Counts number of tasks in a given state.""" logging.info('%s', request) if not request.start: raise endpoints.BadRequestException('start (as epoch) is required') if not request.end: raise endpoints.BadRequestException('end (as epoch) is required') try: now = utils.utcnow() query = task_result.get_result_summaries_query( message_conversion.epoch_to_datetime(request.start), message_conversion.epoch_to_datetime(request.end), 'created_ts', request.state.name.lower(), request.tags) count = query.count() except ValueError as e: raise endpoints.BadRequestException( 'Inappropriate filter for tasks/count: %s' % e) return swarming_rpcs.TasksCount(count=count, now=now)
def _set_jobs_metrics(now): state_map = {task_result.State.RUNNING: 'running', task_result.State.PENDING: 'pending'} query_iter = task_result.get_result_summaries_query( None, None, 'created_ts', 'pending_running', None).iter() jobs_counts = defaultdict(lambda: 0) jobs_pending_distributions = defaultdict( lambda: gae_ts_mon.Distribution(_bucketer)) jobs_max_pending_durations = defaultdict( lambda: 0.0) while (yield query_iter.has_next_async()): summary = query_iter.next() status = state_map.get(summary.state, '') fields = extract_job_fields(summary.tags) target_fields = dict(TARGET_FIELDS) if summary.bot_id: target_fields['hostname'] = 'autogen:' + summary.bot_id if summary.bot_id and status == 'running': jobs_running.set(True, target_fields=target_fields, fields=fields) fields['status'] = status key = tuple(sorted(fields.iteritems())) jobs_counts[key] += 1 pending_duration = summary.pending_now(now) if pending_duration is not None: jobs_pending_distributions[key].add(pending_duration.total_seconds()) jobs_max_pending_durations[key] = max( jobs_max_pending_durations[key], pending_duration.total_seconds()) for key, count in jobs_counts.iteritems(): jobs_active.set(count, target_fields=TARGET_FIELDS, fields=dict(key)) for key, distribution in jobs_pending_distributions.iteritems(): jobs_pending_durations.set( distribution, target_fields=TARGET_FIELDS, fields=dict(key)) for key, val in jobs_max_pending_durations.iteritems(): jobs_max_pending_duration.set( val, target_fields=TARGET_FIELDS, fields=dict(key))
def _set_jobs_metrics(payload): params = _ShardParams(payload) state_map = {task_result.State.RUNNING: 'running', task_result.State.PENDING: 'pending'} jobs_counts = defaultdict(lambda: 0) jobs_total = 0 jobs_pending_distributions = defaultdict( lambda: gae_ts_mon.Distribution(_bucketer)) jobs_max_pending_durations = defaultdict( lambda: 0.0) query_iter = task_result.get_result_summaries_query( None, None, 'created_ts', 'pending_running', None).iter( produce_cursors=True, start_cursor=params.cursor) while query_iter.has_next(): runtime = (utils.utcnow() - params.start_time).total_seconds() if jobs_total >= _JOBS_PER_SHARD or runtime > _REQUEST_TIMEOUT_SEC: params.cursor = query_iter.cursor_after() params.task_count += 1 utils.enqueue_task(url='/internal/taskqueue/tsmon/jobs', queue_name='tsmon', payload=params.json()) params.task_count -= 1 # For accurate logging below. break params.count += 1 jobs_total += 1 summary = query_iter.next() status = state_map.get(summary.state, '') fields = _extract_job_fields(summary.tags) target_fields = dict(_TARGET_FIELDS) if summary.bot_id: target_fields['hostname'] = 'autogen:' + summary.bot_id if summary.bot_id and status == 'running': _jobs_running.set(True, target_fields=target_fields, fields=fields) fields['status'] = status key = tuple(sorted(fields.iteritems())) jobs_counts[key] += 1 pending_duration = summary.pending_now(utils.utcnow()) if pending_duration is not None: jobs_pending_distributions[key].add(pending_duration.total_seconds()) jobs_max_pending_durations[key] = max( jobs_max_pending_durations[key], pending_duration.total_seconds()) logging.debug( '_set_jobs_metrics: task %d started at %s, processed %d jobs (%d total)', params.task_count, params.task_start, jobs_total, params.count) # Global counts are sharded by task_num and aggregated in queries. target_fields = dict(_TARGET_FIELDS) target_fields['task_num'] = params.task_count for key, count in jobs_counts.iteritems(): _jobs_active.set(count, target_fields=target_fields, fields=dict(key)) for key, distribution in jobs_pending_distributions.iteritems(): _jobs_pending_durations.set( distribution, target_fields=target_fields, fields=dict(key)) for key, val in jobs_max_pending_durations.iteritems(): _jobs_max_pending_duration.set( val, target_fields=target_fields, fields=dict(key))
def get(self): cursor_str = self.request.get('cursor') limit = int(self.request.get('limit', 100)) sort = self.request.get('sort', self.SORT_CHOICES[0][0]) state = self.request.get('state', self.STATE_CHOICES[0][0][0]) counts = self.request.get('counts', '').strip() task_tags = [ line for line in self.request.get('task_tag', '').splitlines() if line ] if not any(sort == i[0] for i in self.SORT_CHOICES): self.abort(400, 'Invalid sort') if not any(any(state == i[0] for i in j) for j in self.STATE_CHOICES): self.abort(400, 'Invalid state') if sort != 'created_ts': # Zap all filters in this case to reduce the number of required indexes. # Revisit according to the user requests. state = 'all' now = utils.utcnow() # "Temporarily" disable the count. This is too slow on the prod server # (>10s). The fix is to have the web page do a XHR query to get the values # asynchronously. counts_future = None if counts == 'true': counts_future = self._get_counts_future(now) try: if task_tags: # Enforce created_ts when tags are used. sort = 'created_ts' query = task_result.get_result_summaries_query( None, None, sort, state, task_tags) tasks, cursor_str = datastore_utils.fetch_page(query, limit, cursor_str) # Prefetch the TaskRequest all at once, so that ndb's in-process cache has # it instead of fetching them one at a time indirectly when using # TaskResultSummary.request_key.get(). futures = ndb.get_multi_async(t.request_key for t in tasks) # Evaluate the counts to print the filtering columns with the associated # numbers. state_choices = self._get_state_choices(counts_future) except ValueError as e: self.abort(400, str(e)) def safe_sum(items): return sum(items, datetime.timedelta()) def avg(items): if not items: return 0. return safe_sum(items) / len(items) def median(items): if not items: return 0. middle = len(items) / 2 if len(items) % 2: return items[middle] return (items[middle-1]+items[middle]) / 2 gen = (t.duration_now(now) for t in tasks) durations = sorted(t for t in gen if t is not None) gen = (t.pending_now(now) for t in tasks) pendings = sorted(t for t in gen if t is not None) total_cost_usd = sum(t.cost_usd for t in tasks) total_cost_saved_usd = sum( t.cost_saved_usd for t in tasks if t.cost_saved_usd) # Include the overhead in the total amount of time saved, since it's # overhead saved. # In theory, t.duration_as_seen_by_server should always be set when # t.deduped_from is set but there has some broken entities in the datastore. total_saved = safe_sum( t.duration_as_seen_by_server for t in tasks if t.deduped_from and t.duration_as_seen_by_server) duration_sum = safe_sum(durations) total_saved_percent = ( (100. * total_saved.total_seconds() / duration_sum.total_seconds()) if duration_sum else 0.) try_link = '/tasklist?l=%d' % limit if task_tags: try_link += '&f=' + '&f='.join(task_tags) params = { 'cursor': cursor_str, 'duration_average': avg(durations), 'duration_median': median(durations), 'duration_sum': duration_sum, 'has_pending': any(t.is_pending for t in tasks), 'has_running': any(t.is_running for t in tasks), 'is_admin': acl.is_admin(), 'is_privileged_user': acl.is_privileged_user(), 'limit': limit, 'now': now, 'pending_average': avg(pendings), 'pending_median': median(pendings), 'pending_sum': safe_sum(pendings), 'show_footer': bool(pendings or durations), 'sort': sort, 'sort_choices': self.SORT_CHOICES, 'state': state, 'state_choices': state_choices, 'task_tag': '\n'.join(task_tags), 'tasks': tasks, 'total_cost_usd': total_cost_usd, 'total_cost_saved_usd': total_cost_saved_usd, 'total_saved': total_saved, 'total_saved_percent': total_saved_percent, 'try_link': try_link, 'xsrf_token': self.generate_xsrf_token(), } # TODO(maruel): If admin or if the user is task's .user, show the Cancel # button. Do not show otherwise. self.response.write(template.render('swarming/user_tasks.html', params)) # Do not let dangling futures linger around. ndb.Future.wait_all(futures)