def __init__(self, environment_ids=None, start=None, end=None, search_filters=None): from sentry.search.snuba.executors import get_search_filter self.environment_ids = environment_ids # XXX: We copy this logic from `PostgresSnubaQueryExecutor.query`. Ideally we # should try and encapsulate this logic, but if you're changing this, change it # there as well. self.start = None start_params = [ _f for _f in [start, get_search_filter(search_filters, "date", ">")] if _f ] if start_params: self.start = max([_f for _f in start_params if _f]) self.end = None end_params = [ _f for _f in [end, get_search_filter(search_filters, "date", "<")] if _f ] if end_params: self.end = min(end_params) self.conditions = ([ convert_search_filter_to_snuba_query(search_filter) for search_filter in search_filters if search_filter.key.name not in self.skip_snuba_fields ] if search_filters is not None else [])
def __init__(self, environment_ids=None, start=None, end=None, search_filters=None): self.environment_ids = environment_ids self.start = start self.end = end self.conditions = ([ convert_search_filter_to_snuba_query(search_filter) for search_filter in search_filters if search_filter.key.name not in self.skip_snuba_fields ] if search_filters is not None else [])
def snuba_search( start, end, project_ids, environment_ids, sort_field, cursor=None, candidate_ids=None, limit=None, offset=0, get_sample=False, search_filters=None, ): """ This function doesn't strictly benefit from or require being pulled out of the main query method above, but the query method is already large and this function at least extracts most of the Snuba-specific logic. Returns a tuple of: * a sorted list of (group_id, group_score) tuples sorted descending by score, * the count of total results (rows) available for this query. """ filters = {"project_id": project_ids} if environment_ids is not None: filters["environment"] = environment_ids if candidate_ids: filters["group_id"] = sorted(candidate_ids) conditions = [] having = [] for search_filter in search_filters: if ( # Don't filter on issue fields here, they're not available search_filter.key.name in issue_only_fields or # We special case date search_filter.key.name == "date"): continue converted_filter = convert_search_filter_to_snuba_query(search_filter) # Ensure that no user-generated tags that clashes with aggregation_defs is added to having if search_filter.key.name in aggregation_defs and not search_filter.key.is_tag: having.append(converted_filter) else: conditions.append(converted_filter) extra_aggregations = dependency_aggregations.get(sort_field, []) required_aggregations = set([sort_field, "total"] + extra_aggregations) for h in having: alias = h[0] required_aggregations.add(alias) aggregations = [] for alias in required_aggregations: aggregations.append(aggregation_defs[alias] + [alias]) if cursor is not None: having.append( (sort_field, ">=" if cursor.is_prev else "<=", cursor.value)) selected_columns = [] if get_sample: query_hash = md5(repr(conditions)).hexdigest()[:8] selected_columns.append( ("cityHash64", ("'{}'".format(query_hash), "group_id"), "sample")) sort_field = "sample" orderby = [sort_field] referrer = "search_sample" else: # Get the top matching groups by score, i.e. the actual search results # in the order that we want them. orderby = ["-{}".format(sort_field), "group_id"] # ensure stable sort within the same score referrer = "search" snuba_results = snuba.dataset_query( dataset=Dataset.Events, start=start, end=end, selected_columns=selected_columns, groupby=["group_id"], conditions=conditions, having=having, filter_keys=filters, aggregations=aggregations, orderby=orderby, referrer=referrer, limit=limit, offset=offset, totals= True, # Needs to have totals_mode=after_having_exclusive so we get groups matching HAVING only turbo=get_sample, # Turn off FINAL when in sampling mode sample=1, # Don't use clickhouse sampling, even when in turbo mode. ) rows = snuba_results["data"] total = snuba_results["totals"]["total"] if not get_sample: metrics.timing("snuba.search.num_result_groups", len(rows)) return [(row["group_id"], row[sort_field]) for row in rows], total
def snuba_search( self, start, end, project_ids, environment_ids, sort_field, cursor=None, group_ids=None, limit=None, offset=0, get_sample=False, search_filters=None, ): """ Returns a tuple of: * a sorted list of (group_id, group_score) tuples sorted descending by score, * the count of total results (rows) available for this query. """ filters = {"project_id": project_ids} if environment_ids is not None: filters["environment"] = environment_ids if group_ids: filters["group_id"] = sorted(group_ids) conditions = [] having = [] for search_filter in search_filters: if ( # Don't filter on postgres fields here, they're not available search_filter.key.name in self.postgres_only_fields or # We special case date search_filter.key.name == "date" ): continue converted_filter = convert_search_filter_to_snuba_query(search_filter) converted_filter = self._transform_converted_filter( search_filter, converted_filter, project_ids, environment_ids ) if converted_filter is not None: # Ensure that no user-generated tags that clashes with aggregation_defs is added to having if search_filter.key.name in self.aggregation_defs and not search_filter.key.is_tag: having.append(converted_filter) else: conditions.append(converted_filter) extra_aggregations = self.dependency_aggregations.get(sort_field, []) required_aggregations = set([sort_field, "total"] + extra_aggregations) for h in having: alias = h[0] required_aggregations.add(alias) aggregations = [] for alias in required_aggregations: aggregation = self.aggregation_defs[alias] if callable(aggregation): # TODO: If we want to expand this pattern we should probably figure out # more generic things to pass here. aggregation = aggregation(start, end) aggregations.append(aggregation + [alias]) if cursor is not None: having.append((sort_field, ">=" if cursor.is_prev else "<=", cursor.value)) selected_columns = [] if get_sample: query_hash = md5(json.dumps(conditions).encode("utf-8")).hexdigest()[:8] selected_columns.append( ("cityHash64", ("'{}'".format(query_hash), "group_id"), "sample") ) sort_field = "sample" orderby = [sort_field] referrer = "search_sample" else: # Get the top matching groups by score, i.e. the actual search results # in the order that we want them. orderby = [ "-{}".format(sort_field), "group_id", ] # ensure stable sort within the same score referrer = "search" snuba_results = snuba.aliased_query( dataset=self.dataset, start=start, end=end, selected_columns=selected_columns, groupby=["group_id"], conditions=conditions, having=having, filter_keys=filters, aggregations=aggregations, orderby=orderby, referrer=referrer, limit=limit, offset=offset, totals=True, # Needs to have totals_mode=after_having_exclusive so we get groups matching HAVING only turbo=get_sample, # Turn off FINAL when in sampling mode sample=1, # Don't use clickhouse sampling, even when in turbo mode. condition_resolver=snuba.get_snuba_column_name, ) rows = snuba_results["data"] total = snuba_results["totals"]["total"] if not get_sample: metrics.timing("snuba.search.num_result_groups", len(rows)) return [(row["group_id"], row[sort_field]) for row in rows], total
def snuba_search(start, end, project_ids, environment_ids, sort_field, cursor=None, candidate_ids=None, limit=None, offset=0, get_sample=False, search_filters=None): """ This function doesn't strictly benefit from or require being pulled out of the main query method above, but the query method is already large and this function at least extracts most of the Snuba-specific logic. Returns a tuple of: * a sorted list of (group_id, group_score) tuples sorted descending by score, * the count of total results (rows) available for this query. """ filters = { 'project_id': project_ids, } if environment_ids is not None: filters['environment'] = environment_ids if candidate_ids: filters['issue'] = candidate_ids conditions = [] having = [] for search_filter in search_filters: if ( # Don't filter on issue fields here, they're not available search_filter.key.name in issue_only_fields or # We special case date search_filter.key.name == 'date' ): continue converted_filter = convert_search_filter_to_snuba_query(search_filter) # Ensure that no user-generated tags that clashes with aggregation_defs is added to having if search_filter.key.name in aggregation_defs and not search_filter.key.is_tag: having.append(converted_filter) else: conditions.append(converted_filter) extra_aggregations = dependency_aggregations.get(sort_field, []) required_aggregations = set([sort_field, 'total'] + extra_aggregations) for h in having: alias = h[0] required_aggregations.add(alias) aggregations = [] for alias in required_aggregations: aggregations.append(aggregation_defs[alias] + [alias]) if cursor is not None: having.append((sort_field, '>=' if cursor.is_prev else '<=', cursor.value)) selected_columns = [] if get_sample: query_hash = md5(repr(conditions)).hexdigest()[:8] selected_columns.append(('cityHash64', ("'{}'".format(query_hash), 'issue'), 'sample')) sort_field = 'sample' orderby = [sort_field] referrer = 'search_sample' else: # Get the top matching groups by score, i.e. the actual search results # in the order that we want them. orderby = ['-{}'.format(sort_field), 'issue'] # ensure stable sort within the same score referrer = 'search' snuba_results = snuba.raw_query( start=start, end=end, selected_columns=selected_columns, groupby=['issue'], conditions=conditions, having=having, filter_keys=filters, aggregations=aggregations, orderby=orderby, referrer=referrer, limit=limit, offset=offset, totals=True, # Needs to have totals_mode=after_having_exclusive so we get groups matching HAVING only turbo=get_sample, # Turn off FINAL when in sampling mode sample=1, # Don't use clickhouse sampling, even when in turbo mode. ) rows = snuba_results['data'] total = snuba_results['totals']['total'] if not get_sample: metrics.timing('snuba.search.num_result_groups', len(rows)) return [(row['issue'], row[sort_field]) for row in rows], total
def get(self, request, organization): """ List an Organization's Issues ````````````````````````````` Return a list of issues (groups) bound to an organization. All parameters are supplied as query string parameters. A default query of ``is:unresolved`` is applied. To return results with other statuses send an new query value (i.e. ``?query=`` for all results). The ``groupStatsPeriod`` parameter can be used to select the timeline stats which should be present. Possible values are: '' (disable), '24h', '14d' The ``statsPeriod`` parameter can be used to select a date window starting from now. Ex. ``14d``. The ``start`` and ``end`` parameters can be used to select an absolute date period to fetch issues from. :qparam string statsPeriod: an optional stat period (can be one of ``"24h"``, ``"14d"``, and ``""``). :qparam string groupStatsPeriod: an optional stat period (can be one of ``"24h"``, ``"14d"``, and ``""``). :qparam string start: Beginning date. You must also provide ``end``. :qparam string end: End date. You must also provide ``start``. :qparam bool shortIdLookup: if this is set to true then short IDs are looked up by this function as well. This can cause the return value of the function to return an event issue of a different project which is why this is an opt-in. Set to `1` to enable. :qparam querystring query: an optional Sentry structured search query. If not provided an implied ``"is:unresolved"`` is assumed.) :pparam string organization_slug: the slug of the organization the issues belong to. :auth: required """ stats_period = request.GET.get("groupStatsPeriod") if stats_period not in (None, "", "24h", "14d"): return Response({"detail": ERR_INVALID_STATS_PERIOD}, status=400) elif stats_period is None: # default stats_period = "24h" elif stats_period == "": # disable stats stats_period = None try: start, end = get_date_range_from_params(request.GET) except InvalidParams as e: return Response({"detail": six.text_type(e)}, status=400) environments = self.get_environments(request, organization) serializer = functools.partial( StreamGroupSerializerSnuba, environment_ids=[env.id for env in environments], stats_period=stats_period, ) projects = self.get_projects(request, organization) project_ids = [p.id for p in projects] if not projects: return Response([]) if len(projects) > 1 and not features.has("organizations:global-views", organization, actor=request.user): return Response( { "detail": "You do not have the multi project stream feature enabled" }, status=400) # we ignore date range for both short id and event ids query = request.GET.get("query", "").strip() if query: # check to see if we've got an event ID event_id = normalize_event_id(query) if event_id: # For a direct hit lookup we want to use any passed project ids # (we've already checked permissions on these) plus any other # projects that the user is a member of. This gives us a better # chance of returning the correct result, even if the wrong # project is selected. direct_hit_projects = set(project_ids) | set( [project.id for project in request.access.projects]) groups = list( Group.objects.filter_by_event_id(direct_hit_projects, event_id)) if len(groups) == 1: response = Response( serialize(groups, request.user, serializer(matching_event_id=event_id))) response["X-Sentry-Direct-Hit"] = "1" return response if groups: return Response( serialize(groups, request.user, serializer())) group = get_by_short_id(organization.id, request.GET.get("shortIdLookup"), query) if group is not None: # check all projects user has access to if request.access.has_project_access(group.project): response = Response( serialize([group], request.user, serializer())) response["X-Sentry-Direct-Hit"] = "1" return response # If group ids specified, just ignore any query components try: group_ids = set(map(int, request.GET.getlist("group"))) except ValueError: return Response({"detail": "Group ids must be integers"}, status=400) if group_ids: groups = list( Group.objects.filter(id__in=group_ids, project_id__in=project_ids)) if any(g for g in groups if not request.access.has_project_access(g.project)): raise PermissionDenied return Response(serialize(groups, request.user, serializer())) try: cursor_result, query_kwargs = self._search( request, organization, projects, environments, { "count_hits": True, "date_to": end, "date_from": start }, ) except (ValidationError, discover.InvalidSearchQuery) as exc: return Response({"detail": six.text_type(exc)}, status=400) results = list(cursor_result) lifetime_stats = serialize(results, request.user, serializer()) if features.has("organizations:dynamic-issue-counts", organization, actor=request.user): snuba_filters = [] if "search_filters" in query_kwargs and query_kwargs[ "search_filters"] is not None: snuba_filters = [ convert_search_filter_to_snuba_query(search_filter) for search_filter in query_kwargs["search_filters"] if search_filter.key.name not in self.skip_snuba_fields ] context = serialize(results, request.user, serializer(start=start, end=end)) if snuba_filters: filtered_stats = serialize( results, request.user, serializer(start=start, end=end, snuba_filters=snuba_filters), ) else: filtered_stats = None for idx, ctx in enumerate(context): ctx["lifetime"] = lifetime_stats[idx] if snuba_filters: ctx["filtered"] = filtered_stats[idx] else: # context was the lifetime stats previously with no filters/dynamic start-end values context = lifetime_stats # HACK: remove auto resolved entries # TODO: We should try to integrate this into the search backend, since # this can cause us to arbitrarily return fewer results than requested. status = [ search_filter for search_filter in query_kwargs.get("search_filters", []) if search_filter.key.name == "status" ] if status and status[0].value.raw_value == GroupStatus.UNRESOLVED: context = [r for r in context if r["status"] == "unresolved"] response = Response(context) self.add_cursor_headers(request, response, cursor_result) # TODO(jess): add metrics that are similar to project endpoint here return response
def snuba_search(start, end, project_ids, environment_ids, tags, sort_field, cursor=None, candidate_ids=None, limit=None, offset=0, get_sample=False, search_filters=None, use_new_filters=False, **parameters): """ This function doesn't strictly benefit from or require being pulled out of the main query method above, but the query method is already large and this function at least extracts most of the Snuba-specific logic. Returns a tuple of: * a sorted list of (group_id, group_score) tuples sorted descending by score, * the count of total results (rows) available for this query. """ from sentry.search.base import ANY filters = { 'project_id': project_ids, } if environment_ids is not None: filters['environment'] = environment_ids if candidate_ids is not None: filters['issue'] = candidate_ids conditions = [] if use_new_filters: having = [] for search_filter in search_filters: if search_filter.key.name in issue_only_fields: # Don't filter on issue fields here, they're not available continue converted_filter = convert_search_filter_to_snuba_query( search_filter) if search_filter.key.name in aggregation_defs: having.append(converted_filter) else: conditions.append(converted_filter) else: having = SnubaConditionBuilder({ 'age_from': ScalarCondition('first_seen', '>'), 'age_to': ScalarCondition('first_seen', '<'), 'last_seen_from': ScalarCondition('last_seen', '>'), 'last_seen_to': ScalarCondition('last_seen', '<'), 'times_seen': CallbackCondition( lambda times_seen: ('times_seen', '=', times_seen), ), 'times_seen_lower': ScalarCondition('times_seen', '>'), 'times_seen_upper': ScalarCondition('times_seen', '<'), }).build(parameters) for tag, val in sorted(tags.items()): col = u'tags[{}]'.format(tag) if val == ANY: conditions.append((col, '!=', '')) else: conditions.append((col, '=', val)) extra_aggregations = dependency_aggregations.get(sort_field, []) required_aggregations = set([sort_field, 'total'] + extra_aggregations) for h in having: alias = h[0] required_aggregations.add(alias) aggregations = [] for alias in required_aggregations: aggregations.append(aggregation_defs[alias] + [alias]) if cursor is not None: having.append( (sort_field, '>=' if cursor.is_prev else '<=', cursor.value)) selected_columns = [] if get_sample: # Get a random sample of matching groups. Because we use any(rand()), # we are testing against a single random value per group, and so the # sample is independent of the number of events in a group. Since we # are sampling using `ORDER by random() LIMIT x`, we will always grab # the full result set if there less than x total results. query_hash = md5(repr(conditions)).hexdigest()[:8] selected_columns.append( ('cityHash64', ("'{}'".format(query_hash), 'issue'), 'sample')) sort_field = 'sample' orderby = [sort_field] referrer = 'search_sample' else: # Get the top matching groups by score, i.e. the actual search results # in the order that we want them. orderby = ['-{}'.format(sort_field), 'issue'] # ensure stable sort within the same score referrer = 'search' snuba_results = snuba.raw_query( start=start, end=end, selected_columns=selected_columns, groupby=['issue'], conditions=conditions, having=having, filter_keys=filters, aggregations=aggregations, orderby=orderby, referrer=referrer, limit=limit, offset=offset, totals= True, # Needs to have totals_mode=after_having_exclusive so we get groups matching HAVING only turbo=get_sample, # Turn off FINAL when in sampling mode sample=1, # Don't use clickhouse sampling, even when in turbo mode. ) rows = snuba_results['data'] total = snuba_results['totals']['total'] if not get_sample: metrics.timing('snuba.search.num_result_groups', len(rows)) return [(row['issue'], row[sort_field]) for row in rows], total