def test_numeric_tag_value_paginator(self): from sentry.tagstore.types import TagValue assert list( self.ts.get_tag_value_paginator( self.proj1.id, self.proj1env1.id, "stack.lineno" ).get_result(10) ) == [ TagValue( key="stack.lineno", value="29", times_seen=2, first_seen=self.now - timedelta(seconds=2), last_seen=self.now - timedelta(seconds=1), ) ] assert list( self.ts.get_tag_value_paginator( self.proj1.id, self.proj1env1.id, "stack.lineno", query="30" ).get_result(10) ) == [ TagValue( key="stack.lineno", value="29", times_seen=2, first_seen=self.now - timedelta(seconds=2), last_seen=self.now - timedelta(seconds=1), ) ]
def test_get_tag_value_paginator(self): from sentry.tagstore.types import TagValue assert list( self.ts.get_tag_value_paginator( self.proj1.id, self.proj1env1.id, "sentry:user").get_result(10)) == [ TagValue( key="sentry:user", value="id:user1", times_seen=2, first_seen=self.now - timedelta(seconds=2), last_seen=self.now - timedelta(seconds=1), ), TagValue( key="sentry:user", value="id:user2", times_seen=1, first_seen=self.now - timedelta(seconds=2), last_seen=self.now - timedelta(seconds=2), ), ] assert list( self.ts.get_tag_value_paginator( self.proj1.id, self.proj1env1.id, "sentry:user", query="user1").get_result(10)) == [ TagValue( key="sentry:user", value="id:user1", times_seen=2, first_seen=self.now - timedelta(seconds=2), last_seen=self.now - timedelta(seconds=1), ) ]
def test_get_tag_value_paginator(self): from sentry.tagstore.types import TagValue assert list( self.ts.get_tag_value_paginator( self.proj1.id, self.proj1env1.id, 'sentry:user', ).get_result(10)) == [ TagValue(key='sentry:user', value='id:user1', times_seen=2, first_seen=self.now - timedelta(seconds=2), last_seen=self.now - timedelta(seconds=1)), TagValue(key='sentry:user', value='id:user2', times_seen=1, first_seen=self.now - timedelta(seconds=2), last_seen=self.now - timedelta(seconds=2)) ] assert list( self.ts.get_tag_value_paginator( self.proj1.id, self.proj1env1.id, 'sentry:user', query='user1', ).get_result(10)) == [ TagValue(key='sentry:user', value='id:user1', times_seen=2, first_seen=self.now - timedelta(seconds=2), last_seen=self.now - timedelta(seconds=1)), ]
def test_get_tag_value_paginator_with_dates(self): from sentry.tagstore.types import TagValue day_ago = self.now - timedelta(days=1) two_days_ago = self.now - timedelta(days=2) assert list( self.ts.get_tag_value_paginator( self.proj1.id, self.proj1env1.id, "sentry:user", start=day_ago, end=self.now ).get_result(10) ) == [ TagValue( key="sentry:user", value="id:user1", times_seen=2, first_seen=self.now - timedelta(seconds=2), last_seen=self.now - timedelta(seconds=1), ), TagValue( key="sentry:user", value="id:user2", times_seen=1, first_seen=self.now - timedelta(seconds=2), last_seen=self.now - timedelta(seconds=2), ), ] day_ago = self.now - timedelta(days=1) assert ( list( self.ts.get_tag_value_paginator( self.proj1.id, self.proj1env1.id, "sentry:user", start=two_days_ago, end=day_ago ).get_result(10) ) == [] )
def _get_tag_values_for_semver_build(self, projects, environments, build): from sentry.api.paginator import SequencePaginator build = build if build else "" if not build.endswith("*"): build += "*" organization_id = Project.objects.filter(id=projects[0]).values_list( "organization_id", flat=True )[0] builds = Release.objects.filter_by_semver_build(organization_id, "exact", build, projects) if environments: builds = builds.filter( id__in=ReleaseEnvironment.objects.filter( environment_id__in=environments ).values_list("release_id", flat=True) ) packages = ( builds.values_list("build_code", flat=True).distinct().order_by("build_code")[:1000] ) return SequencePaginator( [(i, TagValue(SEMVER_BUILD_ALIAS, v, None, None, None)) for i, v in enumerate(packages)] )
def _get_tag_values_for_release_stages(self, projects, environments, query): from sentry.api.paginator import SequencePaginator organization_id = Project.objects.filter(id=projects[0]).values_list( "organization_id", flat=True )[0] versions = Release.objects.filter_by_stage( organization_id, "=", query, project_ids=projects, environments=environments, ) if environments: versions = versions.filter( id__in=ReleaseEnvironment.objects.filter( environment_id__in=environments ).values_list("release_id", flat=True) ) versions = versions.order_by("version").values_list("version", flat=True)[:1000] return SequencePaginator( [ (i, TagValue(RELEASE_STAGE_ALIAS, v, None, None, None)) for i, v in enumerate(versions) ] )
def __get_tag_value(self, project_id, group_id, environment_id, key, value): start, end = self.get_time_range() tag = u'tags[{}]'.format(key) filters = { 'project_id': [project_id], 'environment': [environment_id], } if group_id is not None: filters['issue'] = [group_id] conditions = [[tag, '=', value]] aggregations = [ ['count()', '', 'times_seen'], ['min', SEEN_COLUMN, 'first_seen'], ['max', SEEN_COLUMN, 'last_seen'], ] data = snuba.query(start, end, [], conditions, filters, aggregations, referrer='tagstore.__get_tag_value') if not data['times_seen'] > 0: raise TagValueNotFound if group_id is None else GroupTagValueNotFound else: data.update({ 'key': key, 'value': value, }) if group_id is None: return TagValue(**fix_tag_value_data(data)) else: return GroupTagValue(group_id=group_id, **fix_tag_value_data(data))
def get_release_tags(self, organization_id, project_ids, environment_id, versions): filters = {"project_id": project_ids} if environment_id: filters["environment"] = [environment_id] # NB we add release as a condition rather than a filter because # this method is already dealing with version strings rather than # release ids which would need to be translated by the snuba util. tag = "sentry:release" col = f"tags[{tag}]" conditions = [[col, "IN", versions], DEFAULT_TYPE_CONDITION] aggregations = [ ["count()", "", "times_seen"], ["min", SEEN_COLUMN, "first_seen"], ["max", SEEN_COLUMN, "last_seen"], ] start = self.get_min_start_date(organization_id, project_ids, environment_id, versions) result = snuba.query( dataset=Dataset.Events, start=start, groupby=["project_id", col], conditions=conditions, filter_keys=filters, aggregations=aggregations, orderby="-times_seen", referrer="tagstore.get_release_tags", ) values = [] for project_data in result.values(): for value, data in project_data.items(): values.append(TagValue(key=tag, value=value, **fix_tag_value_data(data))) return set(values)
def _get_tag_values_for_releases_across_all_datasets( self, projects, environments, query): from sentry.api.paginator import SequencePaginator organization_id = Project.objects.filter(id=projects[0]).values_list( "organization_id", flat=True)[0] qs = Release.objects.filter(organization_id=organization_id) if projects: qs = qs.filter(id__in=ReleaseProject.objects.filter( project_id__in=projects).values_list("release_id", flat=True)) if environments: qs = qs.filter(id__in=ReleaseEnvironment.objects.filter( environment_id__in=environments).values_list("release_id", flat=True)) if query: qs = qs.filter(version__startswith=query) versions = qs.order_by("version").values_list("version", flat=True)[:1000] return SequencePaginator([(i, TagValue(RELEASE_ALIAS, v, None, None, None)) for i, v in enumerate(versions)])
def get_release_tags(self, project_ids, environment_id, versions): start, end = self.get_time_range() filters = { 'project_id': project_ids, 'environment': [environment_id], } # NB we add release as a condition rather than a filter because # this method is already dealing with version strings rather than # release ids which would need to be translated by the snuba util. tag = 'sentry:release' col = u'tags[{}]'.format(tag) conditions = [[col, 'IN', versions]] aggregations = [ ['count()', '', 'times_seen'], ['min', SEEN_COLUMN, 'first_seen'], ['max', SEEN_COLUMN, 'last_seen'], ] result = snuba.query(start, end, ['project_id', col], conditions, filters, aggregations, referrer='tagstore.get_release_tags') values = [] for project_data in six.itervalues(result): for value, data in six.iteritems(project_data): values.append( TagValue(key=tag, value=value, **fix_tag_value_data(data))) return set(values)
def __get_tag_value(self, project_id, group_id, environment_id, key, value): tag = u"tags[{}]".format(key) filters = {"project_id": get_project_list(project_id)} if environment_id: filters["environment"] = [environment_id] if group_id is not None: filters["issue"] = [group_id] conditions = [[tag, "=", value]] aggregations = [ ["count()", "", "times_seen"], ["min", SEEN_COLUMN, "first_seen"], ["max", SEEN_COLUMN, "last_seen"], ] data = snuba.query( conditions=conditions, filter_keys=filters, aggregations=aggregations, referrer="tagstore.__get_tag_value", ) if not data["times_seen"] > 0: raise TagValueNotFound if group_id is None else GroupTagValueNotFound else: data.update({"key": key, "value": value}) if group_id is None: return TagValue(**fix_tag_value_data(data)) else: return GroupTagValue(group_id=group_id, **fix_tag_value_data(data))
def get_release_tags(self, project_ids, environment_id, versions): filters = {"project_id": project_ids} if environment_id: filters["environment"] = [environment_id] # NB we add release as a condition rather than a filter because # this method is already dealing with version strings rather than # release ids which would need to be translated by the snuba util. tag = "sentry:release" col = u"tags[{}]".format(tag) conditions = [[col, "IN", versions]] aggregations = [ ["count()", "", "times_seen"], ["min", SEEN_COLUMN, "first_seen"], ["max", SEEN_COLUMN, "last_seen"], ] result = snuba.query( groupby=["project_id", col], conditions=conditions, filter_keys=filters, aggregations=aggregations, referrer="tagstore.get_release_tags", ) values = [] for project_data in six.itervalues(result): for value, data in six.iteritems(project_data): values.append( TagValue(key=tag, value=value, **fix_tag_value_data(data))) return set(values)
def create_tag_objects(keys, total_count, top_values): tag_keys_dict = OrderedDict() for top_value in top_values: key = top_value['tags_key'] if key not in tag_keys_dict: tag_keys_dict[key] = TagKey( key=key, top_values=[], count=total_count, ) tag_keys_dict[key].top_values.append( TagValue( key=key, value=top_value['tags_value'], times_seen=top_value['count'], first_seen=top_value['first_seen'], last_seen=top_value['last_seen'], )) # Add categories with no values for key in keys: if key not in tag_keys_dict: tag_keys_dict[key] = TagKey( key=key, top_values=[], count=total_count, ) return tag_keys_dict.values()
def get_tag_value_paginator_for_projects(self, projects, environments, key, start, end, query=None, order_by='-last_seen'): from sentry.api.paginator import SequencePaginator if not order_by == '-last_seen': raise ValueError("Unsupported order_by: %s" % order_by) snuba_key = snuba.get_snuba_column_name(key) conditions = [] if snuba_key in BLACKLISTED_COLUMNS: snuba_key = 'tags[%s]' % (key, ) if query: conditions.append([snuba_key, 'LIKE', u'%{}%'.format(query)]) else: conditions.append([snuba_key, '!=', '']) filters = { 'project_id': projects, } if environments: filters['environment'] = environments results = snuba.query( start=start, end=end, groupby=[snuba_key], filter_keys=filters, aggregations=[ ['count()', '', 'times_seen'], ['min', 'timestamp', 'first_seen'], ['max', 'timestamp', 'last_seen'], ], conditions=conditions, orderby=order_by, # TODO: This means they can't actually paginate all TagValues. limit=1000, arrayjoin=snuba.get_arrayjoin(snuba_key), referrer='tagstore.get_tag_value_paginator_for_projects', ) tag_values = [ TagValue(key=key, value=value, **fix_tag_value_data(data)) for value, data in six.iteritems(results) ] desc = order_by.startswith('-') score_field = order_by.lstrip('-') return SequencePaginator( [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv) for tv in tag_values], reverse=desc)
def get_tag_value_paginator_for_projects(self, projects, environments, key, start=None, end=None, query=None, order_by="-last_seen"): from sentry.api.paginator import SequencePaginator if not order_by == "-last_seen": raise ValueError("Unsupported order_by: %s" % order_by) snuba_key = snuba.get_snuba_column_name(key) conditions = [] if snuba_key in BLACKLISTED_COLUMNS: snuba_key = "tags[%s]" % (key, ) if query: conditions.append([snuba_key, "LIKE", u"%{}%".format(query)]) else: conditions.append([snuba_key, "!=", ""]) filters = {"project_id": projects} if environments: filters["environment"] = environments results = snuba.query( start=start, end=end, groupby=[snuba_key], filter_keys=filters, aggregations=[ ["count()", "", "times_seen"], ["min", "timestamp", "first_seen"], ["max", "timestamp", "last_seen"], ], conditions=conditions, orderby=order_by, # TODO: This means they can't actually paginate all TagValues. limit=1000, arrayjoin=snuba.get_arrayjoin(snuba_key), referrer="tagstore.get_tag_value_paginator_for_projects", ) tag_values = [ TagValue(key=key, value=value, **fix_tag_value_data(data)) for value, data in six.iteritems(results) ] desc = order_by.startswith("-") score_field = order_by.lstrip("-") return SequencePaginator( [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv) for tv in tag_values], reverse=desc, )
def test_query(self): user = self.create_user() tagvalue = TagValue( key="sentry:user", value="username:ted", times_seen=1, first_seen=datetime(2018, 1, 1), last_seen=datetime(2018, 1, 1), ) result = serialize(tagvalue, user, serializer=UserTagValueSerializer(project_id=1)) assert result["value"] == "username:ted" assert result["query"] == 'user.username:"******"'
def test_with_user(self): user = self.create_user() tagvalue = TagValue( key='sentry:user', value='username:ted', times_seen=1, first_seen=datetime(2018, 1, 1), last_seen=datetime(2018, 1, 1), ) result = serialize(tagvalue, user) assert result['key'] == 'user' assert result['value'] == 'username:ted' assert result['name'] == 'ted'
def test_release(self): user = self.create_user() tagvalue = TagValue( key='sentry:release', value='df84bccbb23ca15f2868be1f2a5f7c7a6464fadd', times_seen=1, first_seen=datetime(2018, 1, 1), last_seen=datetime(2018, 1, 1), ) result = serialize(tagvalue, user) assert result['key'] == 'release' assert result['value'] == 'df84bccbb23ca15f2868be1f2a5f7c7a6464fadd' assert result['name'] == 'df84bcc'
def get_tag_value_paginator(self, project_id, environment_id, key, query=None, order_by='-last_seen'): from sentry.api.paginator import SequencePaginator if not order_by == '-last_seen': raise ValueError("Unsupported order_by: %s" % order_by) conditions = [] if query: conditions.append(['tags_value', 'LIKE', u'%{}%'.format(query)]) start, end = self.get_time_range() filters = { 'project_id': [project_id], 'tags_key': [key], } if environment_id: filters['environment'] = [environment_id] results = snuba.query( start=start, end=end, groupby=['tags_value'], filter_keys=filters, aggregations=[ ['count()', '', 'times_seen'], ['min', 'timestamp', 'first_seen'], ['max', 'timestamp', 'last_seen'], ], conditions=conditions, orderby=order_by, # TODO: This means they can't actually paginate all TagValues. limit=1000, referrer='tagstore.get_tag_value_paginator', ) tag_values = [ TagValue(key=key, value=value, **fix_tag_value_data(data)) for value, data in six.iteritems(results) ] desc = order_by.startswith('-') score_field = order_by.lstrip('-') return SequencePaginator( [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv) for tv in tag_values], reverse=desc)
def test_query(self): user = self.create_user() tagvalue = TagValue( key='sentry:user', value='username:ted', times_seen=1, first_seen=datetime(2018, 1, 1), last_seen=datetime(2018, 1, 1), ) result = serialize(tagvalue, user, serializer=UserTagValueSerializer(project_id=1)) assert result['value'] == 'username:ted' assert result['query'] == 'user.username:ted'
def test_release(self): user = self.create_user() tagvalue = TagValue( key="sentry:release", value="df84bccbb23ca15f2868be1f2a5f7c7a6464fadd", times_seen=1, first_seen=datetime(2018, 1, 1), last_seen=datetime(2018, 1, 1), ) result = serialize(tagvalue, user) assert result["key"] == "release" assert result["value"] == "df84bccbb23ca15f2868be1f2a5f7c7a6464fadd" assert result["name"] == "df84bccbb23ca15f2868be1f2a5f7c7a6464fadd" assert "query" not in result
def test_with_user(self): user = self.create_user() tagvalue = TagValue( key="sentry:user", value="username:ted", times_seen=1, first_seen=datetime(2018, 1, 1), last_seen=datetime(2018, 1, 1), ) result = serialize(tagvalue, user) assert result["key"] == "user" assert result["value"] == "username:ted" assert result["name"] == "ted" assert result["query"] == 'user.username:"******"'
def _create_tag_key_tag_value_objects(self, key, data): tag_values = [] values_seen = 0 for datum in data: tag_values.append(TagValue( key=key, value=datum['value'], times_seen=datum['count'], last_seen=datum['last_seen'], first_seen=datum['first_seen'], )) values_seen += datum['count'] tag_key = TagKey( key=key, values_seen=len(tag_values), count=values_seen, top_values=tag_values, ) return tag_key
def _get_tag_values_for_semver_package(self, projects, environments, package): from sentry.api.paginator import SequencePaginator package = package if package else "" organization_id = Project.objects.filter(id=projects[0]).values_list( "organization_id", flat=True)[0] versions = self._get_semver_versions_for_package( projects, organization_id, package) if environments: versions = versions.filter( id__in=ReleaseEnvironment.objects.filter( environment_id__in=environments).values_list("release_id", flat=True)) packages = versions.values_list( "package", flat=True).distinct().order_by("package")[:1000] return SequencePaginator([(i, TagValue(SEMVER_PACKAGE_ALIAS, v, None, None, None)) for i, v in enumerate(packages)])
def run_test(self, query, expected_versions, environment=None, project=None): if project is None: project = self.project assert list( self.ts.get_tag_value_paginator( project.id, environment.id if environment else None, self.KEY, query=query, ).get_result(10)) == [ TagValue( key=self.KEY, value=v, times_seen=None, first_seen=None, last_seen=None, ) for v in expected_versions ]
def run_test(self, query, expected_releases, environment=None, project=None): if project is None: project = self.project assert list( self.ts.get_tag_value_paginator( project.id, environment.id if environment else None, RELEASE_STAGE_ALIAS, query=query, ).get_result(10)) == [ TagValue( key=RELEASE_STAGE_ALIAS, value=r.version, times_seen=None, first_seen=None, last_seen=None, ) for r in expected_releases ]
def get_tag_value_paginator_for_projects(self, projects, environments, key, start=None, end=None, query=None, order_by="-last_seen"): from sentry.api.paginator import SequencePaginator if not order_by == "-last_seen": raise ValueError("Unsupported order_by: %s" % order_by) dataset = Dataset.Events snuba_key = snuba.get_snuba_column_name(key) if snuba_key.startswith("tags["): snuba_key = snuba.get_snuba_column_name(key, dataset=Dataset.Discover) if not snuba_key.startswith("tags["): dataset = Dataset.Discover conditions = [] # transaction status needs a special case so that the user interacts with the names and not codes transaction_status = snuba_key == "transaction_status" if transaction_status: conditions.append([ snuba_key, "IN", # Here we want to use the status codes during filtering, # but want to do this with names that include our query [ span_key for span_key, value in six.iteritems( SPAN_STATUS_CODE_TO_NAME) if (query and query in value) or (not query) ], ]) elif key in FUZZY_NUMERIC_KEYS: converted_query = int( query) if query is not None and query.isdigit() else None if converted_query is not None: conditions.append([ snuba_key, ">=", converted_query - FUZZY_NUMERIC_DISTANCE ]) conditions.append([ snuba_key, "<=", converted_query + FUZZY_NUMERIC_DISTANCE ]) elif key == PROJECT_ALIAS: project_filters = { "id__in": projects, } if query: project_filters["slug__icontains"] = query project_queryset = Project.objects.filter( **project_filters).values("id", "slug") project_slugs = { project["id"]: project["slug"] for project in project_queryset } if project_queryset.exists(): projects = [project["id"] for project in project_queryset] snuba_key = "project_id" dataset = Dataset.Discover else: if snuba_key in BLACKLISTED_COLUMNS: snuba_key = "tags[%s]" % (key, ) if query: conditions.append([snuba_key, "LIKE", u"%{}%".format(query)]) else: conditions.append([snuba_key, "!=", ""]) filters = {"project_id": projects} if environments: filters["environment"] = environments results = snuba.query( dataset=dataset, start=start, end=end, groupby=[snuba_key], filter_keys=filters, aggregations=[ ["count()", "", "times_seen"], ["min", "timestamp", "first_seen"], ["max", "timestamp", "last_seen"], ], conditions=conditions, orderby=order_by, # TODO: This means they can't actually paginate all TagValues. limit=1000, arrayjoin=snuba.get_arrayjoin(snuba_key), referrer="tagstore.get_tag_value_paginator_for_projects", ) # With transaction_status we need to map the ids back to their names if transaction_status: results = OrderedDict([ (SPAN_STATUS_CODE_TO_NAME[result_key], data) for result_key, data in six.iteritems(results) ]) # With project names we map the ids back to the project slugs elif key == PROJECT_ALIAS: results = OrderedDict([(project_slugs[value], data) for value, data in six.iteritems(results)]) tag_values = [ TagValue(key=key, value=six.text_type(value), **fix_tag_value_data(data)) for value, data in six.iteritems(results) ] desc = order_by.startswith("-") score_field = order_by.lstrip("-") return SequencePaginator( [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv) for tv in tag_values], reverse=desc, )
def _get_tag_values_for_semver( self, projects: Sequence[int], environments: Optional[Sequence[str]], query: Optional[str], ): from sentry.api.paginator import SequencePaginator query = query if query else "" organization_id = Project.objects.filter(id=projects[0]).values_list( "organization_id", flat=True )[0] if query and "@" not in query and re.search(r"[^\d.\*]", query): # Handle searching just on package include_package = True versions = self._get_semver_versions_for_package(projects, organization_id, query) else: include_package = "@" in query query = query.replace("*", "") if "@" in query: versions = Release.objects.filter(version__startswith=query) else: versions = Release.objects.filter(version__contains="@" + query) if projects: versions = versions.filter( id__in=ReleaseProject.objects.filter(project_id__in=projects).values_list( "release_id", flat=True ) ) if environments: versions = versions.filter( id__in=ReleaseEnvironment.objects.filter( environment_id__in=environments ).values_list("release_id", flat=True) ) order_by = map(_flip_field_sort, Release.SEMVER_COLS + ["package"]) versions = ( versions.filter_to_semver() .annotate_prerelease_column() .order_by(*order_by) .values_list("version", flat=True)[:1000] ) seen = set() formatted_versions = [] # We want to format versions here in a way that makes sense for autocomplete. So we # - Only include package if we think the user entered a package # - Exclude build number, since it's not used as part of filtering # When we don't include package, this can result in duplicate version numbers, so we # also de-dupe here. This can result in less than 1000 versions returned, but we # typically use very few values so this works ok. for version in versions: formatted_version = version if include_package else version.split("@", 1)[1] formatted_version = formatted_version.split("+", 1)[0] if formatted_version in seen: continue seen.add(formatted_version) formatted_versions.append(formatted_version) return SequencePaginator( [ (i, TagValue(SEMVER_ALIAS, v, None, None, None)) for i, v in enumerate(formatted_versions) ] )
from . import models from sentry.tagstore.types import TagKey, TagValue, GroupTagKey, GroupTagValue from sentry.tasks.post_process import index_event_tags transformers = { models.TagKey: lambda instance: TagKey( key=instance.key, values_seen=instance.values_seen, status=instance.status, ), models.TagValue: lambda instance: TagValue( key=instance.key, value=instance.value, times_seen=instance.times_seen, first_seen=instance.first_seen, last_seen=instance.last_seen, ), models.GroupTagKey: lambda instance: GroupTagKey( group_id=instance.group_id, key=instance.key, values_seen=instance.values_seen, ), models.GroupTagValue: lambda instance: GroupTagValue( group_id=instance.group_id, key=instance.key, value=instance.value, times_seen=instance.times_seen, first_seen=instance.first_seen, last_seen=instance.last_seen,
def get_tag_value_paginator_for_projects( self, projects, environments, key, start=None, end=None, query=None, order_by="-last_seen", include_transactions=False, ): from sentry.api.paginator import SequencePaginator if not order_by == "-last_seen": raise ValueError("Unsupported order_by: %s" % order_by) dataset = Dataset.Events snuba_key = snuba.get_snuba_column_name(key) if include_transactions and snuba_key.startswith("tags["): snuba_key = snuba.get_snuba_column_name(key, dataset=Dataset.Discover) if not snuba_key.startswith("tags["): dataset = Dataset.Discover # We cannot search the values of these columns like we do other columns because they are # a different type, and as such, LIKE and != do not work on them. Furthermore, because the # use case for these values in autosuggestion is minimal, so we choose to disable them here. # # event_id: This is a FixedString which disallows us to use LIKE on it when searching, # but does work with !=. However, for consistency sake we disallow it # entirely, furthermore, suggesting an event_id is not a very useful feature # as they are not human readable. # timestamp: This is a DateTime which disallows us to use both LIKE and != on it when # searching. Suggesting a timestamp can potentially be useful but as it does # work at all, we opt to disable it here. A potential solution can be to # generate a time range to bound where they are searching. e.g. if a user # enters 2020-07 we can generate the following conditions: # >= 2020-07-01T00:00:00 AND <= 2020-07-31T23:59:59 # time: This is a column computed from timestamp so it suffers the same issues if snuba_key in {"event_id", "timestamp", "time"}: return SequencePaginator([]) # These columns have fixed values and we don't need to emit queries to find out the # potential options. if key in {"error.handled", "error.unhandled"}: return SequencePaginator( [ ( 1, TagValue( key=key, value="true", times_seen=None, first_seen=None, last_seen=None ), ), ( 2, TagValue( key=key, value="false", times_seen=None, first_seen=None, last_seen=None ), ), ] ) conditions = [] # transaction status needs a special case so that the user interacts with the names and not codes transaction_status = snuba_key == "transaction_status" if include_transactions and transaction_status: # Here we want to use the status codes during filtering, # but want to do this with names that include our query status_codes = [ span_key for span_key, value in six.iteritems(SPAN_STATUS_CODE_TO_NAME) if (query and query in value) or (not query) ] if status_codes: conditions.append([snuba_key, "IN", status_codes]) else: return SequencePaginator([]) elif key in FUZZY_NUMERIC_KEYS: converted_query = int(query) if query is not None and query.isdigit() else None if converted_query is not None: conditions.append([snuba_key, ">=", converted_query - FUZZY_NUMERIC_DISTANCE]) conditions.append([snuba_key, "<=", converted_query + FUZZY_NUMERIC_DISTANCE]) elif include_transactions and key == PROJECT_ALIAS: project_filters = { "id__in": projects, } if query: project_filters["slug__icontains"] = query project_queryset = Project.objects.filter(**project_filters).values("id", "slug") if not project_queryset.exists(): return SequencePaginator([]) project_slugs = {project["id"]: project["slug"] for project in project_queryset} projects = [project["id"] for project in project_queryset] snuba_key = "project_id" dataset = Dataset.Discover else: snuba_name = snuba_key is_user_alias = include_transactions and key == USER_DISPLAY_ALIAS if is_user_alias: # user.alias is a pseudo column in discover. It is computed by coalescing # together multiple user attributes. Here we get the coalese function used, # and resolve it to the corresponding snuba query dataset = Dataset.Discover resolver = snuba.resolve_column(dataset) snuba_name = FIELD_ALIASES[USER_DISPLAY_ALIAS].get_field() snuba.resolve_complex_column(snuba_name, resolver) elif snuba_name in BLACKLISTED_COLUMNS: snuba_name = "tags[%s]" % (key,) if query: conditions.append([snuba_name, "LIKE", u"%{}%".format(query)]) else: conditions.append([snuba_name, "!=", ""]) filters = {"project_id": projects} if environments: filters["environment"] = environments results = snuba.query( dataset=dataset, start=start, end=end, groupby=[snuba_key], filter_keys=filters, aggregations=[ ["count()", "", "times_seen"], ["min", "timestamp", "first_seen"], ["max", "timestamp", "last_seen"], ], conditions=conditions, orderby=order_by, # TODO: This means they can't actually paginate all TagValues. limit=1000, arrayjoin=snuba.get_arrayjoin(snuba_key), referrer="tagstore.get_tag_value_paginator_for_projects", ) if include_transactions: # With transaction_status we need to map the ids back to their names if transaction_status: results = OrderedDict( [ (SPAN_STATUS_CODE_TO_NAME[result_key], data) for result_key, data in six.iteritems(results) ] ) # With project names we map the ids back to the project slugs elif key == PROJECT_ALIAS: results = OrderedDict( [ (project_slugs[value], data) for value, data in six.iteritems(results) if value in project_slugs ] ) tag_values = [ TagValue(key=key, value=six.text_type(value), **fix_tag_value_data(data)) for value, data in six.iteritems(results) ] desc = order_by.startswith("-") score_field = order_by.lstrip("-") return SequencePaginator( [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv) for tv in tag_values], reverse=desc, )