def query( self, projects: Sequence[Project], retention_window_start: Optional[datetime], group_queryset: QuerySet, environments: Sequence[Environment], sort_by: str, limit: int, cursor: Optional[Cursor], count_hits: bool, paginator_options: Mapping[str, Any], search_filters: Sequence[SearchFilter], date_from: Optional[datetime], date_to: Optional[datetime], max_hits=None, ) -> CursorResult: if not validate_cdc_search_filters(search_filters): raise InvalidQueryForExecutor( "Search filters invalid for this query executor") start, end, retention_date = self.calculate_start_end( retention_window_start, search_filters, date_from, date_to) if start == retention_date and end == retention_date: # Both `start` and `end` must have been trimmed to `retention_date`, # so this entire search was against a time range that is outside of # retention. We'll return empty results to maintain backwards compatibility # with Django search (for now). return self.empty_result if start >= end: # TODO: This maintains backwards compatibility with Django search, but # in the future we should find a way to notify the user that their search # is invalid. return self.empty_result e_event = self.entities["event"] e_group = self.entities["group"] where_conditions = [ Condition(Column("project_id", e_event), Op.IN, [p.id for p in projects]), Condition(Column("timestamp", e_event), Op.GTE, start), Condition(Column("timestamp", e_event), Op.LT, end), ] # TODO: This is still basically only handling status, handle this better once we introduce # more conditions. for search_filter in search_filters: where_conditions.append( Condition(Column(search_filter.key.name, e_group), Op.IN, search_filter.value.raw_value)) if environments: # TODO: Should this be handled via filter_keys, once we have a snql compatible version? where_conditions.append( Condition(Column("environment", e_event), Op.IN, [e.name for e in environments])) sort_func = self.aggregation_defs[self.sort_strategies[sort_by]] having = [] if cursor is not None: op = Op.GTE if cursor.is_prev else Op.LTE having.append(Condition(sort_func, op, cursor.value)) query = Query( "events", match=Join([Relationship(e_event, "grouped", e_group)]), select=[ Column("id", e_group), replace(sort_func, alias="score"), ], where=where_conditions, groupby=[Column("id", e_group)], having=having, orderby=[OrderBy(sort_func, direction=Direction.DESC)], limit=Limit(limit + 1), ) data = snuba.raw_snql_query( query, referrer="search.snuba.cdc_search.query")["data"] hits_query = Query( "events", match=Join([Relationship(e_event, "grouped", e_group)]), select=[ Function("uniq", [Column("id", e_group)], alias="count"), ], where=where_conditions, ) hits = None if count_hits: hits = snuba.raw_snql_query( hits_query, referrer="search.snuba.cdc_search.hits")["data"][0]["count"] paginator_results = SequencePaginator( [(row["score"], row["g.id"]) for row in data], reverse=True, **paginator_options, ).get_result(limit, cursor, known_hits=hits, max_hits=max_hits) # We filter against `group_queryset` here so that we recheck all conditions in Postgres. # Since replay between Postgres and Clickhouse can happen, we might get back results that # have changed state in Postgres. By rechecking them we guarantee than any returned results # have the correct state. # TODO: This can result in us returning less than a full page of results, but shouldn't # affect cursors. If we want to, we can iterate and query snuba until we manage to get a # full page. In practice, this will likely only skip a couple of results at worst, and # probably not be noticeable to the user, so holding off for now to reduce complexity. groups = group_queryset.in_bulk(paginator_results.results) paginator_results.results = [ groups[k] for k in paginator_results.results if k in groups ] return paginator_results