def __init__( self, environment_ids=None, start=None, end=None, search_filters=None, collapse=None, expand=None, organization_id=None, project_ids=None, ): super().__init__(collapse=collapse, expand=expand) from sentry.search.snuba.executors import get_search_filter self.environment_ids = environment_ids # XXX: We copy this logic from `PostgresSnubaQueryExecutor.query`. Ideally we # should try and encapsulate this logic, but if you're changing this, change it # there as well. self.start = None start_params = [ _f for _f in [start, get_search_filter(search_filters, "date", ">")] if _f ] if start_params: self.start = max(_f for _f in start_params if _f) self.end = None end_params = [ _f for _f in [end, get_search_filter(search_filters, "date", "<")] if _f ] if end_params: self.end = min(end_params) self.conditions = ([ convert_search_filter_to_snuba_query( search_filter, params={ "organization_id": organization_id, "project_id": project_ids, "environment_id": environment_ids, }, ) for search_filter in search_filters if search_filter.key.name not in self.skip_snuba_fields ] if search_filters is not None else [])
def snuba_search( self, start, end, project_ids, environment_ids, sort_field, organization_id, cursor=None, group_ids=None, limit=None, offset=0, get_sample=False, search_filters=None, ): """ Returns a tuple of: * a sorted list of (group_id, group_score) tuples sorted descending by score, * the count of total results (rows) available for this query. """ filters = {"project_id": project_ids} environments = None if environment_ids is not None: filters["environment"] = environment_ids environments = list( Environment.objects.filter(organization_id=organization_id, id__in=environment_ids).values_list( "name", flat=True)) if group_ids: filters["group_id"] = sorted(group_ids) conditions = [] having = [] for search_filter in search_filters: if ( # Don't filter on postgres fields here, they're not available search_filter.key.name in self.postgres_only_fields or # We special case date search_filter.key.name == "date"): continue converted_filter = convert_search_filter_to_snuba_query( search_filter, params={ "organization_id": organization_id, "project_id": project_ids, "environment": environments, }, ) converted_filter = self._transform_converted_filter( search_filter, converted_filter, project_ids, environment_ids) if converted_filter is not None: # Ensure that no user-generated tags that clashes with aggregation_defs is added to having if search_filter.key.name in self.aggregation_defs and not search_filter.key.is_tag: having.append(converted_filter) else: conditions.append(converted_filter) extra_aggregations = self.dependency_aggregations.get(sort_field, []) required_aggregations = set([sort_field, "total"] + extra_aggregations) for h in having: alias = h[0] required_aggregations.add(alias) aggregations = [] for alias in required_aggregations: aggregation = self.aggregation_defs[alias] if callable(aggregation): # TODO: If we want to expand this pattern we should probably figure out # more generic things to pass here. aggregation = aggregation(start, end) aggregations.append(aggregation + [alias]) if cursor is not None: having.append( (sort_field, ">=" if cursor.is_prev else "<=", cursor.value)) selected_columns = [] if get_sample: query_hash = md5( json.dumps(conditions).encode("utf-8")).hexdigest()[:8] selected_columns.append( ["cityHash64", [f"'{query_hash}'", "group_id"], "sample"]) sort_field = "sample" orderby = [sort_field] referrer = "search_sample" else: # Get the top matching groups by score, i.e. the actual search results # in the order that we want them. orderby = [ f"-{sort_field}", "group_id", ] # ensure stable sort within the same score referrer = "search" snuba_results = snuba.aliased_query( dataset=self.dataset, start=start, end=end, selected_columns=selected_columns, groupby=["group_id"], conditions=conditions, having=having, filter_keys=filters, aggregations=aggregations, orderby=orderby, referrer=referrer, limit=limit, offset=offset, totals= True, # Needs to have totals_mode=after_having_exclusive so we get groups matching HAVING only turbo=get_sample, # Turn off FINAL when in sampling mode sample=1, # Don't use clickhouse sampling, even when in turbo mode. condition_resolver=snuba.get_snuba_column_name, ) rows = snuba_results["data"] total = snuba_results["totals"]["total"] if not get_sample: metrics.timing("snuba.search.num_result_groups", len(rows)) return [(row["group_id"], row[sort_field]) for row in rows], total