def _process_builds(builds, tag_key, entry_limit): entry_count = 0 new_entries = collections.defaultdict(list) for b in builds: # pragma: no branch for t in b.tags: k, v = buildtags.parse(t) if k == tag_key: new_entries[v].append([b.bucket_id, b.key.id()]) entry_count += 1 if entry_count >= entry_limit: break if entry_count >= entry_limit: break logging.info('collected %d entries', entry_count) _enqueue_flush_entries(tag_key, new_entries)
def parse_v1_tags(v1_tags): """Parses V1 tags. Returns a tuple of: v2_tags: list of StringPair gitiles_commit: common_pb2.GitilesCommit or None gerrit_changes: list of common_pb2.GerritChange. """ v2_tags = [] gitiles_commit = None gitiles_ref = None gerrit_changes = [] for t in v1_tags: key, value = buildtags.parse(t) if key == buildtags.GITILES_REF_KEY: gitiles_ref = value continue if key == buildtags.BUILDSET_KEY: commit = buildtags.parse_gitiles_commit_buildset(value) if commit: if gitiles_commit: # pragma: no cover raise errors.InvalidInputError('multiple gitiles commit') gitiles_commit = commit continue cl = buildtags.parse_gerrit_change_buildset(value) if cl: gerrit_changes.append(cl) continue v2_tags.append(common_pb2.StringPair(key=key, value=value)) if gitiles_commit and not gitiles_commit.ref: gitiles_commit.ref = gitiles_ref or 'refs/heads/master' return v2_tags, gitiles_commit, gerrit_changes
getter=lambda b: b.canary, ), 'cancelation_reason': str_attr_field('cancelation_reason'), 'failure_reason': str_attr_field('failure_reason'), 'result': str_attr_field('result'), 'status': str_attr_field('status_legacy'), 'user_agent': BuildMetricField( value_type=str, field_type=gae_ts_mon.StringField, getter=lambda b: (dict(buildtags.parse(t) for t in b.tags).get('user_agent')), ), } _METRIC_PREFIX_PROD = 'buildbucket/builds/' _METRIC_PREFIX_EXPERIMENTAL = 'buildbucket/builds-experimental/' # Maximum number of concurrent counting/latency queries. _CONCURRENT_QUERY_LIMIT = 100 BUCKETER_24_HR = gae_ts_mon.GeometricBucketer(growth_factor=10**0.05) BUCKETER_48_HR = gae_ts_mon.GeometricBucketer(growth_factor=10**0.053) BUCKETER_5_SEC = gae_ts_mon.GeometricBucketer(growth_factor=10**0.0374) BUCKETER_1K = gae_ts_mon.GeometricBucketer(growth_factor=10**0.031) def _fields_for(build, field_names):
def _tag_index_search_async(q): """Searches for builds using TagIndex entities. For args doc, see search(). Assumes: - arguments are valid - if bool(q.bucket_ids), permissions are checked. Raises: errors.TagIndexIncomplete if the tag index is complete and cannot be used. """ assert q.tags assert not q.bucket_ids or isinstance(q.bucket_ids, set) # Choose a tag to search by. all_indexed_tags = indexed_tags(q.tags) assert all_indexed_tags indexed_tag = all_indexed_tags[0] # choose the most selective tag. indexed_tag_key = buildtags.parse(indexed_tag)[0] # Exclude the indexed tag from the tag filter. q = q.copy() q.tags = q.tags[:] q.tags.remove(indexed_tag) # Determine build id range we are considering. # id_low is inclusive, id_high is exclusive. id_low, id_high = q.get_create_time_order_build_id_range() id_low = 0 if id_low is None else id_low id_high = (1 << 64) - 1 if id_high is None else id_high if q.start_cursor: # The cursor is a minimum build id, exclusive. Such cursor is resilient # to duplicates and additions of index entries to beginning or end. assert RE_TAG_INDEX_SEARCH_CURSOR.match(q.start_cursor) min_id_exclusive = int(q.start_cursor[len('id>'):]) id_low = max(id_low, min_id_exclusive + 1) if id_low >= id_high: raise ndb.Return([], None) # Load index entries and put them to a min-heap, sorted by build_id. entry_heap = [] # tuples (build_id, TagIndexEntry). indexes = yield ndb.get_multi_async(TagIndex.all_shard_keys(indexed_tag)) indexes = [idx for idx in indexes if idx] yield _populate_tag_index_entry_bucket_id(indexes) for idx in indexes: if idx.permanently_incomplete: raise errors.TagIndexIncomplete('TagIndex(%s) is incomplete' % idx.key.id()) for e in idx.entries: if id_low <= e.build_id < id_high: entry_heap.append((e.build_id, e)) if not entry_heap: raise ndb.Return([], None) heapq.heapify(entry_heap) # If buckets were not specified explicitly, permissions were not checked # earlier. In this case, check permissions for each build. check_permissions = not q.bucket_ids has_access_cache = {} # scalar_filters maps a name of a model.Build attribute to a filter value. # Applies only to non-repeated fields. scalar_filters = [ ('result', q.result), ('failure_reason', q.failure_reason), ('cancelation_reason', q.cancelation_reason), ('created_by', q.created_by), ('retry_of', q.retry_of), ('canary', q.canary), # TODO(crbug.com/851036): use e.bucket_id to filter by project before # fetching a build. ('project', q.project), ] scalar_filters = [(a, v) for a, v in scalar_filters if v is not None] if not q.status_is_legacy: scalar_filters.append(('status', q.status)) elif q.status == StatusFilter.INCOMPLETE: scalar_filters.append(('incomplete', True)) elif q.status is not None: scalar_filters.append( ('status_legacy', model.BuildStatus.lookup_by_number(q.status.number))) # Find the builds. result = [] # ordered by build id by ascending. last_considered_entry = None skipped_entries = 0 inconsistent_entries = 0 eof = False while len(result) < q.max_builds: fetch_count = q.max_builds - len(result) entries_to_fetch = [] # ordered by build id by ascending. while entry_heap: _, e = heapq.heappop(entry_heap) prev = last_considered_entry last_considered_entry = e if prev and prev.build_id == e.build_id: # Tolerate duplicates. continue # If we filter by bucket, check it here without fetching the build. # This is not a security check. if q.bucket_ids and e.bucket_id not in q.bucket_ids: continue if check_permissions: has = has_access_cache.get(e.bucket_id) if has is None: has = yield user.can_search_builds_async(e.bucket_id) has_access_cache[e.bucket_id] = has if not has: continue entries_to_fetch.append(e) if len(entries_to_fetch) >= fetch_count: break if not entries_to_fetch: eof = True break builds = yield ndb.get_multi_async( ndb.Key(model.Build, e.build_id) for e in entries_to_fetch) for e, b in zip(entries_to_fetch, builds): # Check for inconsistent entries. if not (b and b.bucket_id == e.bucket_id and indexed_tag in b.tags): logging.warning('entry with build_id %d is inconsistent', e.build_id) inconsistent_entries += 1 continue # Check user-supplied filters. if any(getattr(b, a) != v for a, v in scalar_filters): skipped_entries += 1 continue if not _between(b.create_time, q.create_time_low, q.create_time_high): continue # pragma: no cover if any(t not in b.tags for t in q.tags): skipped_entries += 1 continue if b.experimental and not q.include_experimental: continue if q.builder and b.proto.builder.builder != q.builder: continue result.append(b) metrics.TAG_INDEX_SEARCH_SKIPPED_BUILDS.add( skipped_entries, fields={'tag': indexed_tag_key}) metrics.TAG_INDEX_INCONSISTENT_ENTRIES.add(inconsistent_entries, fields={'tag': indexed_tag_key}) # Return the results. next_cursor = None if not eof and last_considered_entry: next_cursor = 'id>%d' % last_considered_entry.build_id raise ndb.Return(result, next_cursor)
def tags_to_protos(self, dest): """Adds non-hidden self.tags to a repeated StringPair container.""" for t in self.tags: k, v = buildtags.parse(t) if k not in buildtags.HIDDEN_TAG_KEYS: dest.add(key=k, value=v)