Example #1
0
 def test_fail(self):
     now = datetime.now()
     with pytest.raises(snuba.SnubaError):
         snuba.query(
             start=now - timedelta(days=1),
             end=now + timedelta(days=1),
             filter_keys={'project_id': [self.project.id]},
             groupby=[")("],
         )
Example #2
0
    def get_group_tag_values_for_users(self, event_users, limit=100):
        start, end = self.get_time_range()
        filters = {
            'project_id': [eu.project_id for eu in event_users]
        }
        conditions = [
            ['tags[sentry:user]', 'IN', filter(None, [eu.tag_value for eu in event_users])]
        ]
        aggregations = [
            ['count()', '', 'times_seen'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]

        result = snuba.query(start, end, ['issue', 'user_id'], conditions, filters,
                             aggregations, orderby='-last_seen', limit=limit,
                             referrer='tagstore.get_group_tag_values_for_users')

        values = []
        for issue, users in six.iteritems(result):
            for name, data in six.iteritems(users):
                values.append(
                    GroupTagValue(
                        group_id=issue,
                        key='sentry:user',
                        value=name,
                        **fix_tag_value_data(data)
                    )
                )
        return values
Example #3
0
    def __get_tag_values(self, project_id, group_id, environment_id, key):
        start, end = self.get_time_range()
        tag = 'tags[{}]'.format(key)
        filters = {
            'project_id': [project_id],
            'environment': [environment_id],
        }
        if group_id is not None:
            filters['issue'] = [group_id]
        conditions = [[tag, '!=', '']]
        aggregations = [
            ['count()', '', 'times_seen'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]

        result = snuba.query(start, end, [tag], conditions, filters, aggregations,
                             referrer='tagstore.__get_tag_values')

        if group_id is None:
            ctor = TagValue
        else:
            ctor = functools.partial(GroupTagValue, group_id=group_id)

        return set([ctor(key=key, value=value, **fix_tag_value_data(data))
                    for value, data in result.items()])
Example #4
0
 def _get_event_count():
     return snuba.query(
         start=now - timedelta(days=1),
         end=now + timedelta(days=1),
         groupby=['project_id'],
         filter_keys={'project_id': [self.project.id]},
     ).get(self.project.id, 0)
Example #5
0
    def __get_tag_key(self, project_id, group_id, environment_id, key):
        start, end = self.get_time_range()
        tag = u'tags[{}]'.format(key)
        filters = {
            'project_id': [project_id],
            'environment': [environment_id],
        }
        if group_id is not None:
            filters['issue'] = [group_id]
        conditions = [[tag, '!=', '']]
        aggregations = [
            ['uniq', tag, 'values_seen'],
            ['count()', '', 'count']
        ]

        result = snuba.query(start, end, [], conditions, filters, aggregations,
                             referrer='tagstore.__get_tag_key')
        if result is None or result['count'] == 0:
            raise TagKeyNotFound if group_id is None else GroupTagKeyNotFound
        else:
            data = {
                'key': key,
                'values_seen': result['values_seen'],
                'count': result['count'],
            }
            if group_id is None:
                return TagKey(**data)
            else:
                return GroupTagKey(group_id=group_id, **data)
Example #6
0
    def __get_tag_value(self, project_id, group_id, environment_id, key, value):
        start, end = self.get_time_range()
        tag = u'tags[{}]'.format(key)
        filters = {
            'project_id': [project_id],
            'environment': [environment_id],
        }
        if group_id is not None:
            filters['issue'] = [group_id]
        conditions = [[tag, '=', value]]
        aggregations = [
            ['count()', '', 'times_seen'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]

        data = snuba.query(start, end, [], conditions, filters, aggregations,
                           referrer='tagstore.__get_tag_value')
        if not data['times_seen'] > 0:
            raise TagValueNotFound if group_id is None else GroupTagValueNotFound
        else:
            data.update({
                'key': key,
                'value': value,
            })
            if group_id is None:
                return TagValue(**fix_tag_value_data(data))
            else:
                return GroupTagValue(group_id=group_id, **fix_tag_value_data(data))
Example #7
0
    def __get_tag_keys(self, project_id, group_id, environment_id, limit=1000):
        start, end = self.get_time_range()
        filters = {
            'project_id': [project_id],
            'environment': [environment_id],
        }
        if group_id is not None:
            filters['issue'] = [group_id]
        aggregations = [
            ['uniq', 'tags_value', 'values_seen'],
            ['count()', '', 'count']
        ]

        # TODO should this be sorted by count() descending, rather than the
        # number of unique values
        result = snuba.query(start, end, ['tags_key'], [], filters,
                             aggregations, limit=limit, orderby='-values_seen',
                             referrer='tagstore.__get_tag_keys')

        if group_id is None:
            ctor = TagKey
        else:
            ctor = functools.partial(GroupTagKey, group_id=group_id)

        return set([
            ctor(
                key=key,
                values_seen=data['values_seen'],
                count=data['count'],
            ) for key, data in six.iteritems(result) if data['values_seen']
        ])
Example #8
0
    def get_group_seen_values_for_environments(self, project_ids, group_id_list, environment_ids,
                                               start=None, end=None):
        # Get the total times seen, first seen, and last seen across multiple environments
        if start is None or end is None:
            start, end = self.get_time_range()
        filters = {
            'project_id': project_ids,
            'issue': group_id_list,
        }
        conditions = None
        if environment_ids:
            filters['environment'] = environment_ids

        aggregations = [
            ['count()', '', 'times_seen'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]

        result = snuba.query(start, end, ['issue'], conditions, filters, aggregations,
                             referrer='tagstore.get_group_seen_values_for_environments')

        return {
            issue: fix_tag_value_data(data) for issue, data in six.iteritems(result)
        }
Example #9
0
    def get_group_tag_value(self, project_id, group_id, environment_id, key, value):
        from sentry.tagstore.exceptions import GroupTagValueNotFound
        start, end = self.get_time_range()
        tag = 'tags[{}]'.format(key)
        filters = {
            'project_id': [project_id],
            'environment': [environment_id],
        }
        if group_id is not None:
            filters['issue'] = [group_id]
        conditions = [
            [tag, '=', value]
        ]
        aggregations = [['count()', '', 'count']]

        result = snuba.query(start, end, [], conditions, filters, aggregations)

        if result == 0:
            raise GroupTagValueNotFound
        else:
            return ObjectWrapper({
                'times_seen': result,
                'key': key,
                'value': value,
                'group_id': group_id,
            })
Example #10
0
    def test(self):
        "This is just a simple 'hello, world' example test."

        now = datetime.now()

        events = [{
            'event_id': 'x' * 32,
            'primary_hash': '1' * 32,
            'group_id': 1,
            'project_id': self.project.id,
            'message': 'message',
            'platform': 'python',
            'datetime': now.strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
            'data': {
                'received': time.mktime(now.timetuple()),
            }
        }]

        self.snuba_insert(events)

        assert snuba.query(
            start=now - timedelta(days=1),
            end=now + timedelta(days=1),
            groupby=['project_id'],
            filter_keys={'project_id': [self.project.id]},
        ) == {self.project.id: 1}
Example #11
0
    def get_release_tags(self, project_ids, environment_id, versions):
        start, end = self.get_time_range()
        filters = {
            'project_id': project_ids,
            'environment': [environment_id],
        }
        # NB we add release as a condition rather than a filter because
        # this method is already dealing with version strings rather than
        # release ids which would need to be translated by the snuba util.
        tag = 'sentry:release'
        col = u'tags[{}]'.format(tag)
        conditions = [[col, 'IN', versions]]
        aggregations = [
            ['count()', '', 'times_seen'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]

        result = snuba.query(start, end, ['project_id', col],
                             conditions, filters, aggregations,
                             referrer='tagstore.get_release_tags')

        values = []
        for project_data in six.itervalues(result):
            for value, data in six.iteritems(project_data):
                values.append(
                    TagValue(
                        key=tag,
                        value=value,
                        **fix_tag_value_data(data)
                    )
                )

        return set(values)
Example #12
0
    def get_group_tag_values_for_users(self, event_users, limit=100):
        start, end = self.get_time_range()
        filters = {
            'project_id': [eu.project_id for eu in event_users]
        }
        or_conditions = [cond for cond in [
            ('user_id', 'IN', [eu.ident for eu in event_users if eu.ident]),
            ('email', 'IN', [eu.email for eu in event_users if eu.email]),
            ('username', 'IN', [eu.username for eu in event_users if eu.username]),
            ('ip_address', 'IN', [eu.ip_address for eu in event_users if eu.ip_address]),
        ] if cond[2] != []]
        conditions = [or_conditions]
        aggregations = [
            ['count()', '', 'count'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]

        result = snuba.query(start, end, ['user_id'], conditions, filters,
                             aggregations, orderby='-last_seen', limit=limit)

        return [ObjectWrapper({
            'times_seen': val['count'],
            'first_seen': val['first_seen'],
            'last_seen': val['last_seen'],
            'key': 'sentry:user',
            'value': name,
        }) for name, val in six.iteritems(result)]
Example #13
0
    def get_group_tag_value_iter(self, project_id, group_id, environment_id, key, callbacks=()):
        start, end = self.get_time_range()
        results = snuba.query(
            start=start,
            end=end,
            groupby=['tags_value'],
            filter_keys={
                'project_id': [project_id],
                'environment': [environment_id],
                'tags_key': [key],
                'issue': [group_id],
            },
            aggregations=[
                ['count()', '', 'times_seen'],
                ['min', 'timestamp', 'first_seen'],
                ['max', 'timestamp', 'last_seen'],
            ],
            orderby='-first_seen',  # Closest thing to pre-existing `-id` order
            # TODO: This means they can't actually iterate all GroupTagValues.
            limit=1000,
        )

        group_tag_values = [
            GroupTagValue(
                group_id=group_id,
                key=key,
                value=value,
                **fix_tag_value_data(data)
            ) for value, data in six.iteritems(results)
        ]

        for cb in callbacks:
            cb(group_tag_values)

        return group_tag_values
Example #14
0
    def get_group_tag_keys_and_top_values(self, project_id, group_id, environment_id, user=None):
        from sentry import tagstore
        start, end = self.get_time_range()
        filters = {
            'project_id': [project_id],
            'environment': [environment_id],
            'issue': [group_id],
        }
        aggregations = [
            ['count()', '', 'count'],
            ['topK(10)', 'tags_value', 'top'],
            ['uniq', 'tags_value', 'uniq'],
        ]
        conditions = [
            ['tags_value', 'IS NOT NULL', None],
        ]
        results = snuba.query(start, end, ['tags_key'], conditions, filters, aggregations)

        return [{
            'id': key,
            'name': tagstore.get_tag_key_label(key),
            'key': tagstore.get_standardized_key(key),
            'uniqueValues': res['uniq'],
            'totalValues': res['count'],
            'topValues': [{
                'id': val,
                'name': tagstore.get_tag_value_label(key, val),
                'key': tagstore.get_standardized_key(key),
                'value': val,
            } for val in res['top']],
        } for key, res in six.iteritems(results)]
Example #15
0
    def get_release_tags(self, project_ids, environment_id, versions):
        start, end = self.get_time_range()
        filters = {
            'project_id': project_ids,
            'environment': [environment_id],
        }
        # NB we add release as a condition rather than a filter because
        # this method is already dealing with version strings rather than
        # release ids which would need to be translated by the snuba util.
        conditions = [['release', 'IN', versions]]
        aggregations = [
            ['count()', '', 'count'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]

        result = snuba.query(start, end, ['release'], conditions, filters, aggregations)

        return [ObjectWrapper({
            'times_seen': val['count'],
            'first_seen': val['first_seen'],
            'last_seen': val['last_seen'],
            'key': 'release',
            'value': name,
        }) for name, val in six.iteritems(result)]
Example #16
0
    def get_top_group_tag_values(self, project_id, group_id, environment_id, key, limit=3):
        start, end = self.get_time_range()
        tag = 'tags[{}]'.format(key)
        filters = {
            'project_id': [project_id],
            'environment': [environment_id],
            'issue': [group_id],
        }
        conditions = [[tag, '!=', '']]
        aggregations = [
            ['count()', '', 'count'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]

        result = snuba.query(start, end, [tag], conditions, filters,
                             aggregations, limit=limit, orderby='-count')

        return [ObjectWrapper({
            'times_seen': val['count'],
            'first_seen': val['first_seen'],
            'last_seen': val['last_seen'],
            'key': key,
            'value': name,
            'group_id': group_id,
        }) for name, val in six.iteritems(result)]
Example #17
0
    def get_top_group_tag_values(self, project_id, group_id, environment_id, key, limit=3):
        start, end = self.get_time_range()
        tag = 'tags[{}]'.format(key)
        filters = {
            'project_id': [project_id],
            'environment': [environment_id],
            'issue': [group_id],
        }
        conditions = [[tag, '!=', '']]
        aggregations = [
            ['count()', '', 'times_seen'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]

        result = snuba.query(start, end, [tag], conditions, filters,
                             aggregations, limit=limit, orderby='-times_seen',
                             referrer='tagstore.get_top_group_tag_values')
        return [
            GroupTagValue(
                group_id=group_id,
                key=key,
                value=value,
                **fix_tag_value_data(data)
            ) for value, data in six.iteritems(result)
        ]
Example #18
0
    def get_group_list_tag_value(self, project_id, group_ids, environment_id, key, value):
        start, end = self.get_time_range()
        tag = 'tags[{}]'.format(key)
        filters = {
            'project_id': [project_id],
            'environment': [environment_id],
            'issue': group_ids,
        }
        conditions = [
            [tag, '=', value]
        ]
        aggregations = [
            ['count()', '', 'count'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]

        result = snuba.query(start, end, ['issue'], conditions, filters, aggregations)

        return {
            issue: ObjectWrapper({
                'times_seen': val['count'],
                'first_seen': val['first_seen'],
                'last_seen': val['last_seen'],
                'key': key,
                'value': value,
                'group_id': issue,
            }) for issue, val in six.iteritems(result)}
Example #19
0
    def test_use_group_id(self):
        base_time = datetime.utcnow()
        group = self.create_group()
        self._insert_event_for_time(base_time, group_id=group.id)

        with self.options({'snuba.use_group_id_column': True}):
            # verify filter_keys and aggregation
            assert snuba.query(
                start=base_time - timedelta(days=1),
                end=base_time + timedelta(days=1),
                groupby=['issue'],
                filter_keys={
                    'project_id': [self.project.id],
                    'issue': [group.id]
                },
            ) == {group.id: 1}

            # verify raw_query selecting issue row
            assert snuba.raw_query(
                start=base_time - timedelta(days=1),
                end=base_time + timedelta(days=1),
                selected_columns=['issue', 'timestamp'],
                filter_keys={
                    'project_id': [self.project.id],
                    'issue': [group.id]
                },
            )['data'] == [{
                'issue': group.id,
                'timestamp': base_time.strftime('%Y-%m-%dT%H:%M:%S+00:00'),
            }]
Example #20
0
    def get_group_list_tag_value(self, project_id, group_id_list, environment_id, key, value):
        start, end = self.get_time_range()
        tag = u'tags[{}]'.format(key)
        filters = {
            'project_id': [project_id],
            'environment': [environment_id],
            'issue': group_id_list,
        }
        conditions = [
            [tag, '=', value]
        ]
        aggregations = [
            ['count()', '', 'times_seen'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]

        result = snuba.query(start, end, ['issue'], conditions, filters, aggregations,
                             referrer='tagstore.get_group_list_tag_value')

        return {
            issue: GroupTagValue(
                group_id=issue,
                key=key,
                value=value,
                **fix_tag_value_data(data)
            ) for issue, data in six.iteritems(result)
        }
Example #21
0
    def get_tag_value_paginator_for_projects(self, projects, environments, key, start, end,
                                             query=None, order_by='-last_seen'):
        from sentry.api.paginator import SequencePaginator

        if not order_by == '-last_seen':
            raise ValueError("Unsupported order_by: %s" % order_by)

        snuba_key = snuba.get_snuba_column_name(key)

        conditions = []

        if snuba_key in BLACKLISTED_COLUMNS:
            snuba_key = 'tags[%s]' % (key,)

        if query:
            conditions.append([snuba_key, 'LIKE', u'%{}%'.format(query)])
        else:
            conditions.append([snuba_key, '!=', ''])

        filters = {
            'project_id': projects,
        }
        if environments:
            filters['environment'] = environments

        results = snuba.query(
            start=start,
            end=end,
            groupby=[snuba_key],
            filter_keys=filters,
            aggregations=[
                ['count()', '', 'times_seen'],
                ['min', 'timestamp', 'first_seen'],
                ['max', 'timestamp', 'last_seen'],
            ],
            conditions=conditions,
            orderby=order_by,
            # TODO: This means they can't actually paginate all TagValues.
            limit=1000,
            arrayjoin=snuba.get_arrayjoin(snuba_key),
            referrer='tagstore.get_tag_value_paginator_for_projects',
        )

        tag_values = [
            TagValue(
                key=key,
                value=value,
                **fix_tag_value_data(data)
            ) for value, data in six.iteritems(results)
        ]

        desc = order_by.startswith('-')
        score_field = order_by.lstrip('-')
        return SequencePaginator(
            [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv) for tv in tag_values],
            reverse=desc
        )
Example #22
0
 def _query_for_issue(group_id):
     return snuba.query(
         start=base_time - timedelta(days=1),
         end=base_time + timedelta(days=1),
         groupby=['issue'],
         filter_keys={
             'project_id': [self.project.id],
             'issue': [group_id]
         },
     )
Example #23
0
 def _get_event_count():
     # attempt to query back 90 days
     return snuba.query(
         start=base_time - timedelta(days=90),
         end=base_time + timedelta(days=1),
         groupby=['project_id'],
         filter_keys={
             'project_id': [self.project.id],
         },
     )
Example #24
0
    def get_group_tag_keys_and_top_values(
            self, project_id, group_id, environment_ids, user=None, keys=None, value_limit=TOP_VALUES_DEFAULT_LIMIT):
        # Similar to __get_tag_key_and_top_values except we get the top values
        # for all the keys provided. value_limit in this case means the number
        # of top values for each key, so the total rows returned should be
        # num_keys * limit.
        start, end = self.get_time_range()

        # First get totals and unique counts by key.
        keys_with_counts = self.get_group_tag_keys(project_id, group_id, environment_ids, keys=keys)

        # Then get the top values with first_seen/last_seen/count for each
        filters = {
            'project_id': [project_id],
        }
        if environment_ids:
            filters['environment'] = environment_ids
        if keys is not None:
            filters['tags_key'] = keys
        if group_id is not None:
            filters['issue'] = [group_id]

        aggregations = [
            ['count()', '', 'count'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]
        conditions = [['tags_key', 'NOT IN', self.EXCLUDE_TAG_KEYS]]

        values_by_key = snuba.query(
            start, end, ['tags_key', 'tags_value'], conditions, filters, aggregations,
            orderby='-count', limitby=[value_limit, 'tags_key'],
            referrer='tagstore.__get_tag_keys_and_top_values'
        )

        # Then supplement the key objects with the top values for each.
        if group_id is None:
            value_ctor = TagValue
        else:
            value_ctor = functools.partial(GroupTagValue, group_id=group_id)

        for keyobj in keys_with_counts:
            key = keyobj.key
            values = values_by_key.get(key, [])
            keyobj.top_values = [
                value_ctor(
                    key=keyobj.key,
                    value=value,
                    times_seen=data['count'],
                    first_seen=parse_datetime(data['first_seen']),
                    last_seen=parse_datetime(data['last_seen']),
                ) for value, data in six.iteritems(values)
            ]

        return keys_with_counts
Example #25
0
    def get_data(self, model, keys, start, end, rollup=None, environment_ids=None,
                 aggregation='count()', group_on_model=True, group_on_time=False):
        """
        Normalizes all the TSDB parameters and sends a query to snuba.

        `group_on_time`: whether to add a GROUP BY clause on the 'time' field.
        `group_on_model`: whether to add a GROUP BY clause on the primary model.
        """
        model_columns = self.model_columns.get(model)

        if model_columns is None:
            raise Exception(u"Unsupported TSDBModel: {}".format(model.name))

        model_group, model_aggregate = model_columns

        groupby = []
        if group_on_model and model_group is not None:
            groupby.append(model_group)
        if group_on_time:
            groupby.append('time')
        if aggregation == 'count()' and model_aggregate is not None:
            # Special case, because count has different semantics, we change:
            # `COUNT(model_aggregate)` to `COUNT() GROUP BY model_aggregate`
            groupby.append(model_aggregate)
            model_aggregate = None

        keys_map = dict(zip(model_columns, self.flatten_keys(keys)))
        keys_map = {k: v for k, v in six.iteritems(keys_map) if k is not None and v is not None}
        if environment_ids is not None:
            keys_map['environment'] = environment_ids

        aggregations = [[aggregation, model_aggregate, 'aggregate']]

        # For historical compatibility with bucket-counted TSDB implementations
        # we grab the original bucketed series and add the rollup time to the
        # timestamp of the last bucket to get the end time.
        rollup, series = self.get_optimal_rollup_series(start, end, rollup)
        start = to_datetime(series[0])
        end = to_datetime(series[-1] + rollup)

        if keys:
            result = snuba.query(start, end, groupby, None, keys_map,
                                 aggregations, rollup, referrer='tsdb',
                                 is_grouprelease=(model == TSDBModel.frequent_releases_by_group))
        else:
            result = {}

        if group_on_time:
            keys_map['time'] = series

        self.zerofill(result, groupby, keys_map)
        self.trim(result, groupby, keys)

        return result
Example #26
0
    def get_groups_user_counts(self, project_id, group_ids, environment_id):
        start, end = self.get_time_range()
        filters = {
            'project_id': [project_id],
            'environment': [environment_id],
            'issue': group_ids,
        }
        aggregations = [['uniq', 'user_id', 'count']]

        result = snuba.query(start, end, ['issue'], None, filters, aggregations)
        return defaultdict(int, result.items())
Example #27
0
    def get_group_tag_value_count(self, project_id, group_id, environment_id, key):
        start, end = self.get_time_range()
        tag = 'tags[{}]'.format(key)
        filters = {
            'project_id': [project_id],
            'environment': [environment_id],
            'issue': [group_id],
        }
        conditions = [[tag, '!=', '']]
        aggregations = [['count()', '', 'count']]

        return snuba.query(start, end, [], conditions, filters, aggregations)
Example #28
0
    def get_groups_user_counts(self, project_id, group_ids, environment_id):
        start, end = self.get_time_range()
        filters = {
            'project_id': [project_id],
            'environment': [environment_id],
            'issue': group_ids,
        }
        aggregations = [['uniq', 'tags[sentry:user]', 'count']]

        result = snuba.query(start, end, ['issue'], None, filters, aggregations,
                             referrer='tagstore.get_groups_user_counts')
        return defaultdict(int, {k: v for k, v in result.items() if v})
Example #29
0
    def test_organization_retention_larger_than_end_date(self):
        base_time = datetime.utcnow()

        with self.options({'system.event-retention-days': 1}):
            assert snuba.query(
                start=base_time - timedelta(days=90),
                end=base_time - timedelta(days=60),
                groupby=['project_id'],
                filter_keys={
                    'project_id': [self.project.id],
                },
            ) == {}
Example #30
0
    def __get_tag_key_and_top_values(self, project_id, group_id, environment_id,
                                     key, limit=3, raise_on_empty=True):
        start, end = self.get_time_range()
        tag = u'tags[{}]'.format(key)
        filters = {
            'project_id': [project_id],
        }
        if environment_id:
            filters['environment'] = [environment_id]
        if group_id is not None:
            filters['issue'] = [group_id]
        conditions = [[tag, '!=', '']]
        aggregations = [
            ['uniq', tag, 'values_seen'],
            ['count()', '', 'count'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]

        result, totals = snuba.query(
            start, end, [tag], conditions, filters, aggregations,
            orderby='-count', limit=limit, totals=True,
            referrer='tagstore.__get_tag_key_and_top_values'
        )

        if raise_on_empty and (not result or totals.get('count', 0) == 0):
            raise TagKeyNotFound if group_id is None else GroupTagKeyNotFound
        else:
            if group_id is None:
                key_ctor = TagKey
                value_ctor = TagValue
            else:
                key_ctor = functools.partial(GroupTagKey, group_id=group_id)
                value_ctor = functools.partial(GroupTagValue, group_id=group_id)

            top_values = [
                value_ctor(
                    key=key,
                    value=value,
                    times_seen=data['count'],
                    first_seen=parse_datetime(data['first_seen']),
                    last_seen=parse_datetime(data['last_seen']),
                ) for value, data in six.iteritems(result)
            ]

            return key_ctor(
                key=key,
                values_seen=totals.get('values_seen', 0),
                count=totals.get('count', 0),
                top_values=top_values
            )
Example #31
0
    def get_group_tag_keys_and_top_values(
            self,
            project_id,
            group_id,
            environment_id,
            user=None,
            keys=None,
            value_limit=TOP_VALUES_DEFAULT_LIMIT):
        # Similar to __get_tag_key_and_top_values except we get the top values
        # for all the keys provided. value_limit in this case means the number
        # of top values for each key, so the total rows returned should be
        # num_keys * limit.  We also can't use `totals` here to get the number
        # of "other" values for each key as we only get a single total back,
        # which will be the total count across all keys.
        start, end = self.get_time_range()
        filters = {
            'project_id': [project_id],
        }
        if environment_id:
            filters['environment'] = [environment_id]
        if keys is not None:
            filters['tags_key'] = keys
        if group_id is not None:
            filters['issue'] = [group_id]

        aggregations = [
            ['count()', '', 'count'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]

        result = snuba.query(start,
                             end, ['tags_key', 'tags_value'],
                             None,
                             filters,
                             aggregations,
                             orderby='-count',
                             limitby=[value_limit, 'tags_key'],
                             referrer='tagstore.__get_tag_keys_and_top_values')

        if group_id is None:
            key_ctor = TagKey
            value_ctor = TagValue
        else:
            key_ctor = functools.partial(GroupTagKey, group_id=group_id)
            value_ctor = functools.partial(GroupTagValue, group_id=group_id)

        return set([
            key_ctor(

                # TODO we don't know these from the current query, but in the
                # context of this method, the client usually knows these values
                # from the result of a previous call to get_group_tag_keys, so
                # we could fill them in here with another query, but also it
                # could be a waste of time.
                values_seen=0,
                count=0,
                key=key,
                top_values=[
                    value_ctor(
                        key=key,
                        value=value,
                        times_seen=data['count'],
                        first_seen=parse_datetime(data['first_seen']),
                        last_seen=parse_datetime(data['last_seen']),
                    ) for value, data in six.iteritems(values)
                ]) for key, values in six.iteritems(result)
        ])
Example #32
0
    def _query(self, projects, retention_window_start, group_queryset, tags,
               environment, sort_by, limit, cursor, count_hits,
               paginator_options, **parameters):

        # TODO: Product decision: we currently search Group.message to handle
        # the `query` parameter, because that's what we've always done. We could
        # do that search against every event in Snuba instead, but results may
        # differ.

        # TODO: It's possible `first_release` could be handled by Snuba.
        if environment is not None:
            group_queryset = ds.QuerySetBuilder({
                'first_release':
                ds.CallbackCondition(
                    lambda queryset, version: queryset.extra(
                        where=[
                            '{} = {}'.format(
                                ds.get_sql_column(GroupEnvironment,
                                                  'first_release_id'),
                                ds.get_sql_column(Release, 'id'),
                            ),
                            '{} = %s'.format(
                                ds.get_sql_column(Release, 'organization'), ),
                            '{} = %s'.format(
                                ds.get_sql_column(Release, 'version'), ),
                        ],
                        params=[projects[0].organization_id, version],
                        tables=[Release._meta.db_table],
                    ),
                ),
            }).build(
                group_queryset.extra(
                    where=[
                        u'{} = {}'.format(
                            ds.get_sql_column(Group, 'id'),
                            ds.get_sql_column(GroupEnvironment, 'group_id'),
                        ),
                        u'{} = %s'.format(
                            ds.get_sql_column(GroupEnvironment,
                                              'environment_id'), ),
                    ],
                    params=[environment.id],
                    tables=[GroupEnvironment._meta.db_table],
                ),
                parameters,
            )
        else:
            group_queryset = ds.QuerySetBuilder({
                'first_release':
                ds.CallbackCondition(
                    lambda queryset, version: queryset.filter(
                        first_release__organization_id=projects[0].
                        organization_id,
                        first_release__version=version,
                    ),
                ),
            }).build(
                group_queryset,
                parameters,
            )

        now = timezone.now()
        end = parameters.get('date_to')
        if not end:
            end = now + ALLOWED_FUTURE_DELTA

            # This search is for some time window that ends with "now",
            # so if the requested sort is `date` (`last_seen`) and there
            # are no other Snuba-based search predicates, we can simply
            # return the results from Postgres.
            if cursor is None \
                    and sort_by == 'date' \
                    and not tags \
                    and not environment \
                    and not any(param in parameters for param in [
                        'age_from', 'age_to', 'last_seen_from',
                        'last_seen_to', 'times_seen', 'times_seen_lower',
                        'times_seen_upper'
                    ]):
                group_queryset = group_queryset.order_by('-last_seen')
                paginator = DateTimePaginator(group_queryset, '-last_seen',
                                              **paginator_options)
                return paginator.get_result(limit,
                                            cursor,
                                            count_hits=count_hits)

        # TODO: Presumably we only want to search back to the project's max
        # retention date, which may be closer than 90 days in the past, but
        # apparently `retention_window_start` can be None(?), so we need a
        # fallback.
        retention_date = max(
            filter(None, [retention_window_start, now - timedelta(days=90)]))

        start = max(
            filter(None, [
                retention_date,
                parameters.get('date_from'),
            ]))

        end = max([retention_date, end])

        if start == retention_date and end == retention_date:
            # Both `start` and `end` must have been trimmed to `retention_date`,
            # so this entire search was against a time range that is outside of
            # retention. We'll return empty results to maintain backwards compatability
            # with Django search (for now).
            return EMPTY_RESULT

        if start >= end:
            # TODO: This maintains backwards compatability with Django search, but
            # in the future we should find a way to notify the user that their search
            # is invalid.
            return EMPTY_RESULT

        # num_candidates is the number of Group IDs to send down to Snuba, if
        # more Group ID candidates are found, a "bare" Snuba search is performed
        # and the result groups are then post-filtered via queries to the Sentry DB
        optimizer_enabled = options.get(
            'snuba.search.pre-snuba-candidates-optimizer')
        if optimizer_enabled:
            missed_projects = []
            keys = [self._get_project_count_cache_key(p.id) for p in projects]

            counts_by_projects = {
                self._get_project_id_from_key(key): count
                for key, count in cache.get_many(keys).items()
            }

            missed_projects = {p.id
                               for p in projects} - set(
                                   counts_by_projects.keys())

            if missed_projects:
                missing_counts = snuba.query(
                    start=max(
                        filter(
                            None,
                            [retention_window_start, now - timedelta(days=90)
                             ])),
                    end=now,
                    groupby=['project_id'],
                    filter_keys={
                        'project_id': list(missed_projects),
                    },
                    aggregations=[['uniq', 'group_id', 'group_count']],
                    referrer='search',
                )

                cache.set_many(
                    {
                        self._get_project_count_cache_key(project_id): count
                        for project_id, count in missing_counts.items()
                    },
                    options.get('snuba.search.project-group-count-cache-time'))

                counts_by_projects.update(missing_counts)

            min_candidates = options.get(
                'snuba.search.min-pre-snuba-candidates')
            max_candidates = options.get(
                'snuba.search.max-pre-snuba-candidates')
            candidates_percentage = options.get(
                'snuba.search.pre-snuba-candidates-percentage')

            num_candidates = max(
                min_candidates,
                min(max_candidates,
                    sum(counts_by_projects.values()) * candidates_percentage))
        else:
            num_candidates = options.get(
                'snuba.search.min-pre-snuba-candidates')

        # pre-filter query
        candidate_ids = None
        if num_candidates and limit <= num_candidates:
            candidate_ids = list(
                group_queryset.values_list('id',
                                           flat=True)[:num_candidates + 1])
            metrics.timing('snuba.search.num_candidates', len(candidate_ids))

            if not candidate_ids:
                # no matches could possibly be found from this point on
                metrics.incr('snuba.search.no_candidates')
                return EMPTY_RESULT
            elif len(candidate_ids) > num_candidates:
                # If the pre-filter query didn't include anything to significantly
                # filter down the number of results (from 'first_release', 'query',
                # 'status', 'bookmarked_by', 'assigned_to', 'unassigned',
                # 'subscribed_by', 'active_at_from', or 'active_at_to') then it
                # might have surpassed the `num_candidates`. In this case,
                # we *don't* want to pass candidates down to Snuba, and instead we
                # want Snuba to do all the filtering/sorting it can and *then* apply
                # this queryset to the results from Snuba, which we call
                # post-filtering.
                metrics.incr('snuba.search.too_many_candidates')
                candidate_ids = None

        sort_field = sort_strategies[sort_by]
        chunk_growth = options.get('snuba.search.chunk-growth-rate')
        max_chunk_size = options.get('snuba.search.max-chunk-size')
        chunk_limit = limit
        offset = 0
        num_chunks = 0

        paginator_results = EMPTY_RESULT
        result_groups = []
        result_group_ids = set()

        max_time = options.get('snuba.search.max-total-chunk-time-seconds')
        time_start = time.time()

        # Do smaller searches in chunks until we have enough results
        # to answer the query (or hit the end of possible results). We do
        # this because a common case for search is to return 100 groups
        # sorted by `last_seen`, and we want to avoid returning all of
        # a project's groups and then post-sorting them all in Postgres
        # when typically the first N results will do.
        while (time.time() - time_start) < max_time:
            num_chunks += 1

            # grow the chunk size on each iteration to account for huge projects
            # and weird queries, up to a max size
            chunk_limit = min(int(chunk_limit * chunk_growth), max_chunk_size)
            # but if we have candidate_ids always query for at least that many items
            chunk_limit = max(chunk_limit,
                              len(candidate_ids) if candidate_ids else 0)

            # {group_id: group_score, ...}
            snuba_groups, more_results = snuba_search(
                start=start,
                end=end,
                project_ids=[p.id for p in projects],
                environment_id=environment and environment.id,
                tags=tags,
                sort_field=sort_field,
                cursor=cursor,
                candidate_ids=candidate_ids,
                limit=chunk_limit,
                offset=offset,
                **parameters)
            metrics.timing('snuba.search.num_snuba_results', len(snuba_groups))
            offset += len(snuba_groups)

            if not snuba_groups:
                break

            if candidate_ids:
                # pre-filtered candidates were passed down to Snuba,
                # so we're finished with filtering and these are the
                # only results
                result_groups = snuba_groups
            else:
                # pre-filtered candidates were *not* passed down to Snuba,
                # so we need to do post-filtering to verify Sentry DB predicates
                filtered_group_ids = group_queryset.filter(
                    id__in=[gid
                            for gid, _ in snuba_groups]).values_list('id',
                                                                     flat=True)

                group_to_score = dict(snuba_groups)
                for group_id in filtered_group_ids:
                    if group_id in result_group_ids:
                        # because we're doing multiple Snuba queries, which
                        # happen outside of a transaction, there is a small possibility
                        # of groups moving around in the sort scoring underneath us,
                        # so we at least want to protect against duplicates
                        continue

                    group_score = group_to_score[group_id]
                    result_group_ids.add(group_id)
                    result_groups.append((group_id, group_score))

            paginator_results = SequencePaginator(
                [(score, id) for (id, score) in result_groups],
                reverse=True,
                **paginator_options).get_result(limit,
                                                cursor,
                                                count_hits=False)

            # break the query loop for one of three reasons:
            # * we started with Postgres candidates and so only do one Snuba query max
            # * the paginator is returning enough results to satisfy the query (>= the limit)
            # * there are no more groups in Snuba to post-filter
            if candidate_ids \
                    or len(paginator_results.results) >= limit \
                    or not more_results:
                break

        # HACK: We're using the SequencePaginator to mask the complexities of going
        # back and forth between two databases. This causes a problem with pagination
        # because we're 'lying' to the SequencePaginator (it thinks it has the entire
        # result set in memory when it does not). For this reason we need to make some
        # best guesses as to whether the `prev` and `next` cursors have more results.
        if len(paginator_results.results) == limit and more_results:
            # Because we are going back and forth between DBs there is a small
            # chance that we will hand the SequencePaginator exactly `limit`
            # items. In this case the paginator will assume there are no more
            # results, so we need to override the `next` cursor's results.
            paginator_results.next.has_results = True

        if cursor is not None and (not cursor.is_prev
                                   or len(paginator_results.results) > 0):
            # If the user passed a cursor, and it isn't already a 0 result `is_prev`
            # cursor, then it's worth allowing them to go back a page to check for
            # more results.
            paginator_results.prev.has_results = True

        metrics.timing('snuba.search.num_chunks', num_chunks)

        groups = Group.objects.in_bulk(paginator_results.results)
        paginator_results.results = [
            groups[k] for k in paginator_results.results if k in groups
        ]

        return paginator_results
Example #33
0
def do_search(project_id, environment_id, tags, start, end,
              sort, candidates=None, limit=1000, **parameters):
    from sentry.search.base import ANY

    filters = {
        'project_id': [project_id],
    }

    if environment_id is not None:
        filters['environment'] = [environment_id]

    if candidates is not None:
        hashes = list(
            GroupHash.objects.filter(
                group_id__in=candidates
            ).values_list(
                'hash', flat=True
            ).distinct()
        )

        if not hashes:
            return {}

        filters['primary_hash'] = hashes

    having = SnubaConditionBuilder({
        'age_from': ScalarCondition('first_seen', '>'),
        'age_to': ScalarCondition('first_seen', '<'),
        'last_seen_from': ScalarCondition('last_seen', '>'),
        'last_seen_to': ScalarCondition('last_seen', '<'),
        'times_seen': CallbackCondition(
            lambda times_seen: ('times_seen', '=', times_seen),
        ),
        'times_seen_lower': ScalarCondition('times_seen', '>'),
        'times_seen_upper': ScalarCondition('times_seen', '<'),
    }).build(parameters)

    conditions = []
    for tag, val in six.iteritems(tags):
        col = 'tags[{}]'.format(tag)
        if val == ANY:
            conditions.append((col, '!=', ''))
        else:
            conditions.append((col, '=', val))

    aggregations = [
        ['count()', '', 'times_seen'],
        ['min', 'timestamp', 'first_seen'],
        ['max', 'timestamp', 'last_seen'],
        [priority_expr, '', 'priority']
    ]

    # {hash -> {times_seen -> int
    #           first_seen -> date_str,
    #           last_seen -> date_str,
    #           priority -> int},
    #  ...}
    snuba_results = snuba.query(
        start=start,
        end=end,
        groupby=['primary_hash'],
        conditions=conditions,
        having=having,
        filter_keys=filters,
        aggregations=aggregations,
        orderby=sort,
        limit=limit,
    )

    # {hash -> group_id, ...}
    hash_to_group = dict(
        GroupHash.objects.filter(
            project_id=project_id,
            hash__in=snuba_results.keys()
        ).values_list(
            'hash', 'group_id'
        )
    )

    # {group_id -> {field1: [...all values from field1 for all hashes...],
    #               field2: [...all values from field2 for all hashes...]
    #               ...}
    #  ...}
    group_data = {}
    for hash, obj in snuba_results.items():
        if hash in hash_to_group:
            group_id = hash_to_group[hash]

            if group_id not in group_data:
                group_data[group_id] = defaultdict(list)

            dest = group_data[group_id]
            for k, v in obj.items():
                dest[k].append(v)
        else:
            logger.warning(
                'search.hash_not_found',
                extra={
                    'project_id': project_id,
                    'hash': hash,
                },
            )

    return group_data
Example #34
0
    def get_data(self,
                 model,
                 keys,
                 start,
                 end,
                 rollup=None,
                 environment_id=None,
                 aggregation='count()',
                 group_on_model=True,
                 group_on_time=False):
        """
        Normalizes all the TSDB parameters and sends a query to snuba.

        `group_on_time`: whether to add a GROUP BY clause on the 'time' field.
        `group_on_model`: whether to add a GROUP BY clause on the primary model.
        """
        model_columns = self.model_columns.get(model)

        if model_columns is None:
            raise Exception("Unsupported TSDBModel: {}".format(model.name))

        model_group, model_aggregate = model_columns

        groupby = []
        if group_on_model and model_group is not None:
            groupby.append(model_group)
        if group_on_time:
            groupby.append('time')
        if aggregation == 'count()' and model_aggregate is not None:
            # Special case, because count has different semantics, we change:
            # `COUNT(model_aggregate)` to `COUNT() GROUP BY model_aggregate`
            groupby.append(model_aggregate)
            model_aggregate = None

        keys_map = dict(zip(model_columns, self.flatten_keys(keys)))
        keys_map = {
            k: v
            for k, v in six.iteritems(keys_map)
            if k is not None and v is not None
        }
        if environment_id is not None:
            keys_map['environment'] = [environment_id]

        aggregations = [[aggregation, model_aggregate, 'aggregate']]

        # For historical compatibility with bucket-counted TSDB implementations
        # we grab the original bucketed series and add the rollup time to the
        # timestamp of the last bucket to get the end time.
        rollup, series = self.get_optimal_rollup_series(start, end, rollup)
        start = to_datetime(series[0])
        end = to_datetime(series[-1] + rollup)

        result = snuba.query(start,
                             end,
                             groupby,
                             None,
                             keys_map,
                             aggregations,
                             rollup,
                             referrer='tsdb')

        if group_on_time:
            keys_map['time'] = series
        self.zerofill(result, groupby, keys_map)

        return result
Example #35
0
    def get_tag_value_paginator_for_projects(
        self,
        projects,
        environments,
        key,
        start=None,
        end=None,
        query=None,
        order_by="-last_seen",
        include_transactions=False,
    ):
        from sentry.api.paginator import SequencePaginator

        if not order_by == "-last_seen":
            raise ValueError("Unsupported order_by: %s" % order_by)

        dataset = Dataset.Events
        if include_transactions:
            dataset = Dataset.Discover
        snuba_key = snuba.get_snuba_column_name(key, dataset=dataset)

        # We cannot search the values of these columns like we do other columns because they are
        # a different type, and as such, LIKE and != do not work on them. Furthermore, because the
        # use case for these values in autosuggestion is minimal, so we choose to disable them here.
        #
        # event_id:     This is a FixedString which disallows us to use LIKE on it when searching,
        #               but does work with !=. However, for consistency sake we disallow it
        #               entirely, furthermore, suggesting an event_id is not a very useful feature
        #               as they are not human readable.
        # timestamp:    This is a DateTime which disallows us to use both LIKE and != on it when
        #               searching. Suggesting a timestamp can potentially be useful but as it does
        #               work at all, we opt to disable it here. A potential solution can be to
        #               generate a time range to bound where they are searching. e.g. if a user
        #               enters 2020-07 we can generate the following conditions:
        #               >= 2020-07-01T00:00:00 AND <= 2020-07-31T23:59:59
        # time:         This is a column computed from timestamp so it suffers the same issues
        if snuba_key in {"event_id", "timestamp", "time"}:
            return SequencePaginator([])

        # These columns have fixed values and we don't need to emit queries to find out the
        # potential options.
        if key in {"error.handled", "error.unhandled"}:
            return SequencePaginator([
                (
                    1,
                    TagValue(key=key,
                             value="true",
                             times_seen=None,
                             first_seen=None,
                             last_seen=None),
                ),
                (
                    2,
                    TagValue(key=key,
                             value="false",
                             times_seen=None,
                             first_seen=None,
                             last_seen=None),
                ),
            ])

        conditions = []
        # transaction status needs a special case so that the user interacts with the names and not codes
        transaction_status = snuba_key == "transaction_status"
        if include_transactions and transaction_status:
            # Here we want to use the status codes during filtering,
            # but want to do this with names that include our query
            status_codes = [
                span_key
                for span_key, value in six.iteritems(SPAN_STATUS_CODE_TO_NAME)
                if (query and query in value) or (not query)
            ]
            if status_codes:
                conditions.append([snuba_key, "IN", status_codes])
            else:
                return SequencePaginator([])
        elif key in FUZZY_NUMERIC_KEYS:
            converted_query = int(
                query) if query is not None and query.isdigit() else None
            if converted_query is not None:
                conditions.append([
                    snuba_key, ">=", converted_query - FUZZY_NUMERIC_DISTANCE
                ])
                conditions.append([
                    snuba_key, "<=", converted_query + FUZZY_NUMERIC_DISTANCE
                ])
        elif include_transactions and key == PROJECT_ALIAS:
            project_filters = {
                "id__in": projects,
            }
            if query:
                project_filters["slug__icontains"] = query
            project_queryset = Project.objects.filter(
                **project_filters).values("id", "slug")

            if not project_queryset.exists():
                return SequencePaginator([])

            project_slugs = {
                project["id"]: project["slug"]
                for project in project_queryset
            }
            projects = [project["id"] for project in project_queryset]
            snuba_key = "project_id"
        else:
            snuba_name = snuba_key

            is_user_alias = include_transactions and key == USER_DISPLAY_ALIAS
            if is_user_alias:
                # user.alias is a pseudo column in discover. It is computed by coalescing
                # together multiple user attributes. Here we get the coalese function used,
                # and resolve it to the corresponding snuba query
                resolver = snuba.resolve_column(dataset)
                snuba_name = FIELD_ALIASES[USER_DISPLAY_ALIAS].get_field()
                snuba.resolve_complex_column(snuba_name, resolver)
            elif snuba_name in BLACKLISTED_COLUMNS:
                snuba_name = "tags[%s]" % (key, )

            if query:
                conditions.append([snuba_name, "LIKE", "%{}%".format(query)])
            else:
                conditions.append([snuba_name, "!=", ""])

        filters = {"project_id": projects}
        if environments:
            filters["environment"] = environments

        if dataset == Dataset.Events:
            conditions.append(DEFAULT_TYPE_CONDITION)

        results = snuba.query(
            dataset=dataset,
            start=start,
            end=end,
            groupby=[snuba_key],
            filter_keys=filters,
            aggregations=[
                ["count()", "", "times_seen"],
                ["min", "timestamp", "first_seen"],
                ["max", "timestamp", "last_seen"],
            ],
            conditions=conditions,
            orderby=order_by,
            # TODO: This means they can't actually paginate all TagValues.
            limit=1000,
            arrayjoin=snuba.get_arrayjoin(snuba_key),
            referrer="tagstore.get_tag_value_paginator_for_projects",
        )

        if include_transactions:
            # With transaction_status we need to map the ids back to their names
            if transaction_status:
                results = OrderedDict([
                    (SPAN_STATUS_CODE_TO_NAME[result_key], data)
                    for result_key, data in six.iteritems(results)
                ])
            # With project names we map the ids back to the project slugs
            elif key == PROJECT_ALIAS:
                results = OrderedDict([
                    (project_slugs[value], data)
                    for value, data in six.iteritems(results)
                    if value in project_slugs
                ])

        tag_values = [
            TagValue(key=key,
                     value=six.text_type(value),
                     **fix_tag_value_data(data))
            for value, data in six.iteritems(results)
        ]

        desc = order_by.startswith("-")
        score_field = order_by.lstrip("-")
        return SequencePaginator(
            [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv)
             for tv in tag_values],
            reverse=desc,
        )
Example #36
0
def snuba_search(project_id, environment_id, tags, start, end,
                 sort, extra_aggregations, score_fn, candidate_hashes, **parameters):
    """
    This function doesn't strictly benefit from or require being pulled out of the main
    query method above, but the query method is already large and this function at least
    extracts most of the Snuba-specific logic.

    Returns an OrderedDict of {group_id: group_score, ...} sorted descending by score.
    """

    from sentry.search.base import ANY

    filters = {
        'project_id': [project_id],
    }

    if environment_id is not None:
        filters['environment'] = [environment_id]

    if candidate_hashes is not None:
        filters['primary_hash'] = candidate_hashes.keys()

    having = SnubaConditionBuilder({
        'age_from': ScalarCondition('first_seen', '>'),
        'age_to': ScalarCondition('first_seen', '<'),
        'last_seen_from': ScalarCondition('last_seen', '>'),
        'last_seen_to': ScalarCondition('last_seen', '<'),
        'times_seen': CallbackCondition(
            lambda times_seen: ('times_seen', '=', times_seen),
        ),
        'times_seen_lower': ScalarCondition('times_seen', '>'),
        'times_seen_upper': ScalarCondition('times_seen', '<'),
    }).build(parameters)

    conditions = []
    for tag, val in six.iteritems(tags):
        col = u'tags[{}]'.format(tag)
        if val == ANY:
            conditions.append((col, '!=', ''))
        else:
            conditions.append((col, '=', val))

    required_aggregations = set([sort] + extra_aggregations)
    for h in having:
        alias = h[0]
        required_aggregations.add(alias)

    aggregations = []
    for alias in required_aggregations:
        aggregations.append(aggregation_defs[alias] + [alias])

    # {hash -> {<agg_alias> -> <agg_value>,
    #           <agg_alias> -> <agg_value>,
    #           ...},
    #  ...}
    # _OR_ if there's only one <agg_alias> in use
    # {hash -> <agg_value>,
    #  ...}
    snuba_results = snuba.query(
        start=start,
        end=end,
        groupby=['primary_hash'],
        conditions=conditions,
        having=having,
        filter_keys=filters,
        aggregations=aggregations,
        orderby='-' + sort,
        referrer='search',
    )

    # {hash -> group_id, ...}
    if candidate_hashes is not None:
        # any hash coming back had to come from our candidate set
        hash_to_group = candidate_hashes
    else:
        hash_to_group = dict(
            GroupHash.objects.filter(
                project_id=project_id,
                hash__in=snuba_results.keys()
            ).values_list(
                'hash', 'group_id'
            )
        )

    # {group_id -> {field1: [...all values from field1 for all hashes...],
    #               field2: [...all values from field2 for all hashes...]
    #               ...}
    #  ...}
    group_data = {}
    for hash, obj in snuba_results.items():
        if hash in hash_to_group:
            group_id = hash_to_group[hash]

            if group_id not in group_data:
                group_data[group_id] = defaultdict(list)

            dest = group_data[group_id]

            # NOTE: The Snuba utility code is trying to be helpful by collapsing
            # results with only one aggregate down to the single value. It's a
            # bit of a hack that we then immediately undo that work here, but
            # many other callers get value out of that functionality. If we see
            # this pattern again we should either add an option to opt-out of
            # the 'help' here or remove it from the Snuba code altogether.
            if len(required_aggregations) == 1:
                alias = list(required_aggregations)[0]
                dest[alias].append(obj)
            else:
                for k, v in obj.items():
                    dest[k].append(v)
        else:
            logger.warning(
                'search.hash_not_found',
                extra={
                    'project_id': project_id,
                    'hash': hash,
                },
            )

    return OrderedDict(
        sorted(((gid, score_fn(data))
                for gid, data in group_data.items()), key=lambda t: t[1], reverse=True)
    )
Example #37
0
    def __get_tag_key_and_top_values(self,
                                     project_id,
                                     group_id,
                                     environment_id,
                                     key,
                                     limit=3,
                                     raise_on_empty=True,
                                     **kwargs):

        tag = u"tags[{}]".format(key)
        filters = {"project_id": get_project_list(project_id)}
        if environment_id:
            filters["environment"] = [environment_id]
        if group_id is not None:
            filters["group_id"] = [group_id]
        conditions = kwargs.get("conditions", [])
        aggregations = kwargs.get("aggregations", [])

        conditions.append([tag, "!=", ""])
        aggregations += [
            ["uniq", tag, "values_seen"],
            ["count()", "", "count"],
            ["min", SEEN_COLUMN, "first_seen"],
            ["max", SEEN_COLUMN, "last_seen"],
        ]

        result, totals = snuba.query(
            start=kwargs.get("start"),
            end=kwargs.get("end"),
            groupby=[tag],
            conditions=conditions,
            filter_keys=filters,
            aggregations=aggregations,
            orderby="-count",
            limit=limit,
            totals=True,
            referrer="tagstore.__get_tag_key_and_top_values",
        )

        if raise_on_empty and (not result or totals.get("count", 0) == 0):
            raise TagKeyNotFound if group_id is None else GroupTagKeyNotFound
        else:
            if group_id is None:
                key_ctor = TagKey
                value_ctor = TagValue
            else:
                key_ctor = functools.partial(GroupTagKey, group_id=group_id)
                value_ctor = functools.partial(GroupTagValue,
                                               group_id=group_id)

            top_values = [
                value_ctor(
                    key=key,
                    value=value,
                    times_seen=data["count"],
                    first_seen=parse_datetime(data["first_seen"]),
                    last_seen=parse_datetime(data["last_seen"]),
                ) for value, data in six.iteritems(result)
            ]

            return key_ctor(
                key=key,
                values_seen=totals.get("values_seen", 0),
                count=totals.get("count", 0),
                top_values=top_values,
            )
Example #38
0
    def get_tag_value_paginator_for_projects(self,
                                             projects,
                                             environments,
                                             key,
                                             start=None,
                                             end=None,
                                             query=None,
                                             order_by="-last_seen"):
        from sentry.api.paginator import SequencePaginator

        if not order_by == "-last_seen":
            raise ValueError("Unsupported order_by: %s" % order_by)

        snuba_key = snuba.get_snuba_column_name(key)

        conditions = []

        if key in FUZZY_NUMERIC_KEYS:
            converted_query = int(
                query) if query is not None and query.isdigit() else None
            if converted_query is not None:
                conditions.append([
                    snuba_key, ">=", converted_query - FUZZY_NUMERIC_DISTANCE
                ])
                conditions.append([
                    snuba_key, "<=", converted_query + FUZZY_NUMERIC_DISTANCE
                ])
        else:
            if snuba_key in BLACKLISTED_COLUMNS:
                snuba_key = "tags[%s]" % (key, )

            if query:
                conditions.append([snuba_key, "LIKE", u"%{}%".format(query)])
            else:
                conditions.append([snuba_key, "!=", ""])

        filters = {"project_id": projects}
        if environments:
            filters["environment"] = environments

        results = snuba.query(
            start=start,
            end=end,
            groupby=[snuba_key],
            filter_keys=filters,
            aggregations=[
                ["count()", "", "times_seen"],
                ["min", "timestamp", "first_seen"],
                ["max", "timestamp", "last_seen"],
            ],
            conditions=conditions,
            orderby=order_by,
            # TODO: This means they can't actually paginate all TagValues.
            limit=1000,
            arrayjoin=snuba.get_arrayjoin(snuba_key),
            referrer="tagstore.get_tag_value_paginator_for_projects",
        )

        tag_values = [
            TagValue(key=key,
                     value=six.text_type(value),
                     **fix_tag_value_data(data))
            for value, data in six.iteritems(results)
        ]

        desc = order_by.startswith("-")
        score_field = order_by.lstrip("-")
        return SequencePaginator(
            [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv)
             for tv in tag_values],
            reverse=desc,
        )
Example #39
0
def snuba_search(start, end, project_ids, environment_id, tags, sort_field,
                 cursor, candidate_ids, limit, offset, **parameters):
    """
    This function doesn't strictly benefit from or require being pulled out of the main
    query method above, but the query method is already large and this function at least
    extracts most of the Snuba-specific logic.

    Returns a tuple of:
     * a sorted list of (group_id, group_score) tuples sorted descending by score,
     * a boolean indicating whether there are more result groups to iterate over
    """

    from sentry.search.base import ANY

    filters = {
        'project_id': project_ids,
    }

    if environment_id is not None:
        filters['environment'] = [environment_id]

    if candidate_ids is not None:
        filters['issue'] = candidate_ids

    having = SnubaConditionBuilder({
        'age_from':
        ScalarCondition('first_seen', '>'),
        'age_to':
        ScalarCondition('first_seen', '<'),
        'last_seen_from':
        ScalarCondition('last_seen', '>'),
        'last_seen_to':
        ScalarCondition('last_seen', '<'),
        'times_seen':
        CallbackCondition(lambda times_seen:
                          ('times_seen', '=', times_seen), ),
        'times_seen_lower':
        ScalarCondition('times_seen', '>'),
        'times_seen_upper':
        ScalarCondition('times_seen', '<'),
    }).build(parameters)

    conditions = []
    for tag, val in six.iteritems(tags):
        col = u'tags[{}]'.format(tag)
        if val == ANY:
            conditions.append((col, '!=', ''))
        else:
            conditions.append((col, '=', val))

    extra_aggregations = dependency_aggregations.get(sort_field, [])
    required_aggregations = set([sort_field] + extra_aggregations)
    for h in having:
        alias = h[0]
        required_aggregations.add(alias)

    aggregations = []
    for alias in required_aggregations:
        aggregations.append(aggregation_defs[alias] + [alias])

    if cursor is not None:
        having.append(
            (sort_field, '>=' if cursor.is_prev else '<=', cursor.value))

    # {group_id -> {<agg_alias> -> <agg_value>,
    #               <agg_alias> -> <agg_value>,
    #               ...},
    #  ...}
    # _OR_ if there's only one <agg_alias> in use
    # {group_id -> <agg_value>,
    #  ...}
    snuba_results = snuba.query(
        start=start,
        end=end,
        groupby=['issue'],
        conditions=conditions,
        having=having,
        filter_keys=filters,
        aggregations=aggregations,
        orderby=['-' + sort_field,
                 'issue'],  # ensure stable sort within the same score
        referrer='search',
        limit=limit + 1,
        offset=offset,
    )
    metrics.timing('snuba.search.num_result_groups', len(snuba_results.keys()))
    more_results = len(snuba_results) == limit + 1

    # {group_id -> score,
    #  ...}
    group_data = {}
    for group_id, obj in snuba_results.items():
        # NOTE: The Snuba utility code is trying to be helpful by collapsing
        # results with only one aggregate down to the single value. It's a
        # bit of a hack that we then immediately undo that work here, but
        # many other callers get value out of that functionality. If we see
        # this pattern again we should either add an option to opt-out of
        # the 'help' here or remove it from the Snuba code altogether.
        if len(required_aggregations) == 1:
            group_data[group_id] = obj
        else:
            group_data[group_id] = obj[sort_field]

    return (list(
        sorted(((gid, score) for gid, score in group_data.items()),
               key=lambda t: t[1],
               reverse=True))[:limit], more_results)
Example #40
0
    def get_data(
        self,
        model,
        keys,
        start,
        end,
        rollup=None,
        environment_ids=None,
        aggregation="count()",
        group_on_model=True,
        group_on_time=False,
        conditions=None,
    ):
        """
        Normalizes all the TSDB parameters and sends a query to snuba.

        `group_on_time`: whether to add a GROUP BY clause on the 'time' field.
        `group_on_model`: whether to add a GROUP BY clause on the primary model.
        """
        # XXX: to counteract the hack in project_key_stats.py
        if model in [
            TSDBModel.key_total_received,
            TSDBModel.key_total_blacklisted,
            TSDBModel.key_total_rejected,
        ]:
            keys = list(set(map(lambda x: int(x), keys)))

        # 10s is the only rollup under an hour that we support
        if rollup and rollup == 10 and model in self.lower_rollup_query_settings:
            model_query_settings = self.lower_rollup_query_settings.get(model)
        else:
            model_query_settings = self.model_query_settings.get(model)

        if model_query_settings is None:
            raise Exception(u"Unsupported TSDBModel: {}".format(model.name))

        model_group = model_query_settings.groupby
        model_aggregate = model_query_settings.aggregate

        groupby = []
        if group_on_model and model_group is not None:
            groupby.append(model_group)
        if group_on_time:
            groupby.append("time")
        if aggregation == "count()" and model_aggregate is not None:
            # Special case, because count has different semantics, we change:
            # `COUNT(model_aggregate)` to `COUNT() GROUP BY model_aggregate`
            groupby.append(model_aggregate)
            model_aggregate = None

        columns = (model_query_settings.groupby, model_query_settings.aggregate)
        keys_map = dict(zip(columns, self.flatten_keys(keys)))
        keys_map = {k: v for k, v in six.iteritems(keys_map) if k is not None and v is not None}
        if environment_ids is not None:
            keys_map["environment"] = environment_ids

        aggregations = [[aggregation, model_aggregate, "aggregate"]]

        # For historical compatibility with bucket-counted TSDB implementations
        # we grab the original bucketed series and add the rollup time to the
        # timestamp of the last bucket to get the end time.
        rollup, series = self.get_optimal_rollup_series(start, end, rollup)
        start = to_datetime(series[0])
        end = to_datetime(series[-1] + rollup)
        limit = min(10000, int(len(keys) * ((end - start).total_seconds() / rollup)))

        conditions = conditions if conditions is not None else []
        if model_query_settings.conditions is not None:
            conditions += deepcopy(model_query_settings.conditions)
            # copy because we modify the conditions in snuba.query

        if keys:
            result = snuba.query(
                dataset=model_query_settings.dataset,
                start=start,
                end=end,
                groupby=groupby,
                conditions=conditions,
                filter_keys=keys_map,
                aggregations=aggregations,
                rollup=rollup,
                limit=limit,
                referrer="tsdb",
                is_grouprelease=(model == TSDBModel.frequent_releases_by_group),
            )
        else:
            result = {}

        if group_on_time:
            keys_map["time"] = series

        self.zerofill(result, groupby, keys_map)
        self.trim(result, groupby, keys)

        return result
Example #41
0
    def get_data(
        self,
        model,
        keys,
        start,
        end,
        rollup=None,
        environment_ids=None,
        aggregation="count()",
        group_on_model=True,
        group_on_time=False,
    ):
        """
        Normalizes all the TSDB parameters and sends a query to snuba.

        `group_on_time`: whether to add a GROUP BY clause on the 'time' field.
        `group_on_model`: whether to add a GROUP BY clause on the primary model.
        """
        model_columns = self.model_columns.get(model)

        if model_columns is None:
            raise Exception(u"Unsupported TSDBModel: {}".format(model.name))

        model_group, model_aggregate = model_columns

        groupby = []
        if group_on_model and model_group is not None:
            groupby.append(model_group)
        if group_on_time:
            groupby.append("time")
        if aggregation == "count()" and model_aggregate is not None:
            # Special case, because count has different semantics, we change:
            # `COUNT(model_aggregate)` to `COUNT() GROUP BY model_aggregate`
            groupby.append(model_aggregate)
            model_aggregate = None

        keys_map = dict(zip(model_columns, self.flatten_keys(keys)))
        keys_map = {k: v for k, v in six.iteritems(keys_map) if k is not None and v is not None}
        if environment_ids is not None:
            keys_map["environment"] = environment_ids

        aggregations = [[aggregation, model_aggregate, "aggregate"]]

        # For historical compatibility with bucket-counted TSDB implementations
        # we grab the original bucketed series and add the rollup time to the
        # timestamp of the last bucket to get the end time.
        rollup, series = self.get_optimal_rollup_series(start, end, rollup)
        start = to_datetime(series[0])
        end = to_datetime(series[-1] + rollup)
        limit = min(10000, len(keys) * ((end - start).total_seconds() / rollup))

        if keys:
            result = snuba.query(
                start=start,
                end=end,
                groupby=groupby,
                conditions=None,
                filter_keys=keys_map,
                aggregations=aggregations,
                rollup=rollup,
                limit=limit,
                referrer="tsdb",
                is_grouprelease=(model == TSDBModel.frequent_releases_by_group),
            )
        else:
            result = {}

        if group_on_time:
            keys_map["time"] = series

        self.zerofill(result, groupby, keys_map)
        self.trim(result, groupby, keys)

        return result
Example #42
0
    def get_group_tag_keys_and_top_values(
            self,
            project_id,
            group_id,
            environment_ids,
            user=None,
            keys=None,
            value_limit=TOP_VALUES_DEFAULT_LIMIT):
        # Similar to __get_tag_key_and_top_values except we get the top values
        # for all the keys provided. value_limit in this case means the number
        # of top values for each key, so the total rows returned should be
        # num_keys * limit.
        start, end = self.get_time_range()

        # First get totals and unique counts by key.
        keys_with_counts = self.get_group_tag_keys(project_id,
                                                   group_id,
                                                   environment_ids,
                                                   keys=keys)

        # Then get the top values with first_seen/last_seen/count for each
        filters = {
            'project_id': [project_id],
        }
        if environment_ids:
            filters['environment'] = environment_ids
        if keys is not None:
            filters['tags_key'] = keys
        if group_id is not None:
            filters['issue'] = [group_id]

        aggregations = [
            ['count()', '', 'count'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]
        conditions = [['tags_key', 'NOT IN', self.EXCLUDE_TAG_KEYS]]

        values_by_key = snuba.query(
            start,
            end, ['tags_key', 'tags_value'],
            conditions,
            filters,
            aggregations,
            orderby='-count',
            limitby=[value_limit, 'tags_key'],
            referrer='tagstore.__get_tag_keys_and_top_values')

        # Then supplement the key objects with the top values for each.
        if group_id is None:
            value_ctor = TagValue
        else:
            value_ctor = functools.partial(GroupTagValue, group_id=group_id)

        for keyobj in keys_with_counts:
            key = keyobj.key
            values = values_by_key.get(key, [])
            keyobj.top_values = [
                value_ctor(
                    key=keyobj.key,
                    value=value,
                    times_seen=data['count'],
                    first_seen=parse_datetime(data['first_seen']),
                    last_seen=parse_datetime(data['last_seen']),
                ) for value, data in six.iteritems(values)
            ]

        return keys_with_counts
Example #43
0
    def __get_tag_keys_for_projects(self,
                                    projects,
                                    group_id,
                                    environments,
                                    start,
                                    end,
                                    limit=1000,
                                    keys=None,
                                    include_values_seen=True,
                                    use_cache=False,
                                    **kwargs):
        """ Query snuba for tag keys based on projects

            When use_cache is passed, we'll attempt to use the cache. There's an exception if group_id was passed
            which refines the query enough caching isn't required.
            The cache key is based on the filters being passed so that different queries don't hit the same cache, with
            exceptions for start and end dates. Since even a microsecond passing would result in a different caching
            key, which means always missing the cache.
            Instead, to keep the cache key the same for a short period we append the duration, and the end time rounded
            with a certain jitter to the cache key.
            This jitter is based on the hash of the key before duration/end time is added for consistency per query.
            The jitter's intent is to avoid a dogpile effect of many queries being invalidated at the same time.
            This is done by changing the rounding of the end key to a random offset. See snuba.quantize_time for
            further explanation of how that is done.
        """
        default_start, default_end = default_start_end_dates()
        if start is None:
            start = default_start
        if end is None:
            end = default_end

        filters = {"project_id": sorted(projects)}
        if environments:
            filters["environment"] = sorted(environments)
        if group_id is not None:
            filters["group_id"] = [group_id]
        if keys is not None:
            filters["tags_key"] = sorted(keys)
        aggregations = [["count()", "", "count"]]

        if include_values_seen:
            aggregations.append(["uniq", "tags_value", "values_seen"])
        conditions = []

        should_cache = use_cache and group_id is None
        result = None

        if should_cache:
            filtering_strings = [
                u"{}={}".format(key, value)
                for key, value in six.iteritems(filters)
            ]
            cache_key = u"tagstore.__get_tag_keys:{}".format(
                md5_text(*filtering_strings).hexdigest())
            key_hash = hash(cache_key)
            should_cache = (key_hash % 1000) / 1000.0 <= options.get(
                "snuba.tagstore.cache-tagkeys-rate")

        # If we want to continue attempting to cache after checking against the cache rate
        if should_cache:
            # Needs to happen before creating the cache suffix otherwise rounding will cause different durations
            duration = (end - start).total_seconds()
            # Cause there's rounding to create this cache suffix, we want to update the query end so results match
            end = snuba.quantize_time(end, key_hash)
            cache_key += u":{}@{}".format(duration, end.isoformat())
            result = cache.get(cache_key, None)
            if result is not None:
                metrics.incr("testing.tagstore.cache_tag_key.hit")
            else:
                metrics.incr("testing.tagstore.cache_tag_key.miss")

        if result is None:
            result = snuba.query(start=start,
                                 end=end,
                                 groupby=["tags_key"],
                                 conditions=conditions,
                                 filter_keys=filters,
                                 aggregations=aggregations,
                                 limit=limit,
                                 orderby="-count",
                                 referrer="tagstore.__get_tag_keys",
                                 **kwargs)
            if should_cache:
                cache.set(cache_key, result, 300)
                metrics.incr("testing.tagstore.cache_tag_key.len",
                             amount=len(result))

        if group_id is None:
            ctor = TagKey
        else:
            ctor = functools.partial(GroupTagKey, group_id=group_id)

        results = set()
        for key, data in six.iteritems(result):
            params = {"key": key}
            if include_values_seen:
                params["values_seen"] = data["values_seen"]
                params["count"] = data["count"]
            else:
                # If only one aggregate is requested then data is just that raw
                # aggregate value, rather than a dictionary of
                # key:aggregate_value pairs
                params["count"] = data
            results.add(ctor(**params))
        return results
Example #44
0
    def get_group_tag_keys_and_top_values(self,
                                          project_id,
                                          group_id,
                                          environment_ids,
                                          user=None,
                                          keys=None,
                                          value_limit=TOP_VALUES_DEFAULT_LIMIT,
                                          **kwargs):
        # Similar to __get_tag_key_and_top_values except we get the top values
        # for all the keys provided. value_limit in this case means the number
        # of top values for each key, so the total rows returned should be
        # num_keys * limit.

        # First get totals and unique counts by key.
        keys_with_counts = self.get_group_tag_keys(project_id,
                                                   group_id,
                                                   environment_ids,
                                                   keys=keys)

        # Then get the top values with first_seen/last_seen/count for each
        filters = {"project_id": get_project_list(project_id)}
        if environment_ids:
            filters["environment"] = environment_ids
        if keys is not None:
            filters["tags_key"] = keys
        if group_id is not None:
            filters["group_id"] = [group_id]
        conditions = kwargs.get("conditions", [])
        aggregations = kwargs.get("aggregations", [])
        aggregations += [
            ["count()", "", "count"],
            ["min", SEEN_COLUMN, "first_seen"],
            ["max", SEEN_COLUMN, "last_seen"],
        ]

        values_by_key = snuba.query(
            start=kwargs.get("start"),
            end=kwargs.get("end"),
            groupby=["tags_key", "tags_value"],
            conditions=conditions,
            filter_keys=filters,
            aggregations=aggregations,
            orderby="-count",
            limitby=[value_limit, "tags_key"],
            referrer="tagstore.__get_tag_keys_and_top_values",
        )

        # Then supplement the key objects with the top values for each.
        if group_id is None:
            value_ctor = TagValue
        else:
            value_ctor = functools.partial(GroupTagValue, group_id=group_id)

        for keyobj in keys_with_counts:
            key = keyobj.key
            values = values_by_key.get(key, [])
            keyobj.top_values = [
                value_ctor(
                    key=keyobj.key,
                    value=value,
                    times_seen=data["count"],
                    first_seen=parse_datetime(data["first_seen"]),
                    last_seen=parse_datetime(data["last_seen"]),
                ) for value, data in six.iteritems(values)
            ]

        return keys_with_counts
Example #45
0
    def get_tag_value_paginator_for_projects(self,
                                             projects,
                                             environments,
                                             key,
                                             start=None,
                                             end=None,
                                             query=None,
                                             order_by="-last_seen"):
        from sentry.api.paginator import SequencePaginator

        if not order_by == "-last_seen":
            raise ValueError("Unsupported order_by: %s" % order_by)

        dataset = Dataset.Events
        snuba_key = snuba.get_snuba_column_name(key)
        if snuba_key.startswith("tags["):
            snuba_key = snuba.get_snuba_column_name(key,
                                                    dataset=Dataset.Discover)
            if not snuba_key.startswith("tags["):
                dataset = Dataset.Discover

        conditions = []

        # transaction status needs a special case so that the user interacts with the names and not codes
        transaction_status = snuba_key == "transaction_status"
        if transaction_status:
            conditions.append([
                snuba_key,
                "IN",
                # Here we want to use the status codes during filtering,
                # but want to do this with names that include our query
                [
                    span_key for span_key, value in six.iteritems(
                        SPAN_STATUS_CODE_TO_NAME)
                    if (query and query in value) or (not query)
                ],
            ])
        elif key in FUZZY_NUMERIC_KEYS:
            converted_query = int(
                query) if query is not None and query.isdigit() else None
            if converted_query is not None:
                conditions.append([
                    snuba_key, ">=", converted_query - FUZZY_NUMERIC_DISTANCE
                ])
                conditions.append([
                    snuba_key, "<=", converted_query + FUZZY_NUMERIC_DISTANCE
                ])
        elif key == PROJECT_ALIAS:
            project_filters = {
                "id__in": projects,
            }
            if query:
                project_filters["slug__icontains"] = query
            project_queryset = Project.objects.filter(
                **project_filters).values("id", "slug")
            project_slugs = {
                project["id"]: project["slug"]
                for project in project_queryset
            }
            if project_queryset.exists():
                projects = [project["id"] for project in project_queryset]
                snuba_key = "project_id"
                dataset = Dataset.Discover
        else:
            if snuba_key in BLACKLISTED_COLUMNS:
                snuba_key = "tags[%s]" % (key, )

            if query:
                conditions.append([snuba_key, "LIKE", u"%{}%".format(query)])
            else:
                conditions.append([snuba_key, "!=", ""])

        filters = {"project_id": projects}
        if environments:
            filters["environment"] = environments

        results = snuba.query(
            dataset=dataset,
            start=start,
            end=end,
            groupby=[snuba_key],
            filter_keys=filters,
            aggregations=[
                ["count()", "", "times_seen"],
                ["min", "timestamp", "first_seen"],
                ["max", "timestamp", "last_seen"],
            ],
            conditions=conditions,
            orderby=order_by,
            # TODO: This means they can't actually paginate all TagValues.
            limit=1000,
            arrayjoin=snuba.get_arrayjoin(snuba_key),
            referrer="tagstore.get_tag_value_paginator_for_projects",
        )

        # With transaction_status we need to map the ids back to their names
        if transaction_status:
            results = OrderedDict([
                (SPAN_STATUS_CODE_TO_NAME[result_key], data)
                for result_key, data in six.iteritems(results)
            ])
        # With project names we map the ids back to the project slugs
        elif key == PROJECT_ALIAS:
            results = OrderedDict([(project_slugs[value], data)
                                   for value, data in six.iteritems(results)])

        tag_values = [
            TagValue(key=key,
                     value=six.text_type(value),
                     **fix_tag_value_data(data))
            for value, data in six.iteritems(results)
        ]

        desc = order_by.startswith("-")
        score_field = order_by.lstrip("-")
        return SequencePaginator(
            [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv)
             for tv in tag_values],
            reverse=desc,
        )