def __search_events_legacy(self, request, project): from sentry import quotas from sentry.api.paginator import DateTimePaginator from sentry.models import Event events = Event.objects.filter( project_id=project.id, ) query = request.GET.get('query') if query: events = events.filter( message__icontains=query, ) # filter out events which are beyond the retention period retention = quotas.get_event_retention(organization=project.organization) if retention: events = events.filter( datetime__gte=timezone.now() - timedelta(days=retention) ) return self.paginate( request=request, queryset=events, order_by='-datetime', on_results=lambda x: serialize(x, request.user), paginator_cls=DateTimePaginator, )
def insert(self, group, event, is_new, is_sample, is_regression, is_new_group_environment, primary_hash, skip_consume=False): # ensure the superclass's insert() is called, regardless of what happens # attempting to send to Kafka super(KafkaEventStream, self).insert( group, event, is_new, is_sample, is_regression, is_new_group_environment, primary_hash, skip_consume ) project = event.project retention_days = quotas.get_event_retention( organization=Organization(project.organization_id) ) self._send(project.id, 'insert', extra_data=({ 'group_id': event.group_id, 'event_id': event.event_id, 'organization_id': project.organization_id, 'project_id': event.project_id, 'message': event.message, 'platform': event.platform, 'datetime': event.datetime, 'data': dict(event.data.items()), 'primary_hash': primary_hash, 'retention_days': retention_days, }, { 'is_new': is_new, 'is_sample': is_sample, 'is_regression': is_regression, 'is_new_group_environment': is_new_group_environment, },))
def _get_events_legacy( self, request, group, environments, query, tags, start, end, ): events = Event.objects.filter(group_id=group.id) if query: q = Q(message__icontains=query) if is_event_id(query): q |= Q(event_id__exact=query) events = events.filter(q) if tags: event_filter = tagstore.get_group_event_filter( group.project_id, group.id, [env.id for env in environments], tags, start, end, ) if not event_filter: return Response([]) events = events.filter(**event_filter) # Filter start/end here in case we didn't filter by tags at all if start: events = events.filter(datetime__gte=start) if end: events = events.filter(datetime__lte=end) # filter out events which are beyond the retention period retention = quotas.get_event_retention(organization=group.project.organization) if retention: events = events.filter( datetime__gte=timezone.now() - timedelta(days=retention) ) return self.paginate( request=request, queryset=events, order_by='-datetime', on_results=lambda x: serialize(x, request.user), paginator_cls=DateTimePaginator, )
def insert( self, group, event, is_new, is_regression, is_new_group_environment, primary_hash, received_timestamp, # type: float skip_consume=False, ): project = event.project retention_days = quotas.get_event_retention( organization=project.organization) event_data = event.get_raw_data() unexpected_tags = set([ k for (k, v) in (get_path(event_data, "tags", filter=True) or []) if k in self.UNEXPECTED_TAG_KEYS ]) if unexpected_tags: logger.error("%r received unexpected tags: %r", self, unexpected_tags) self._send( project.id, "insert", extra_data=( { "group_id": event.group_id, "event_id": event.event_id, "organization_id": project.organization_id, "project_id": event.project_id, # TODO(mitsuhiko): We do not want to send this incorrect # message but this is what snuba needs at the moment. "message": event.search_message, "platform": event.platform, "datetime": event.datetime, "data": event_data, "primary_hash": primary_hash, "retention_days": retention_days, }, { "is_new": is_new, "is_regression": is_regression, "is_new_group_environment": is_new_group_environment, "skip_consume": skip_consume, }, ), headers={'Received-Timestamp': six.text_type(received_timestamp)})
def test_internal_relays_should_receive_full_configs(call_endpoint, default_project, default_projectkey): result, status_code = call_endpoint(full_config=True) assert status_code < 400 # Sweeping assertion that we do not have any snake_case in that config. # Might need refining. assert not {x for x in _get_all_keys(result) if "-" in x or "_" in x} cfg = safe.get_path(result, "configs", str(default_project.id)) assert safe.get_path(cfg, "disabled") is False (public_key, ) = cfg["publicKeys"] assert public_key["publicKey"] == default_projectkey.public_key assert public_key["isEnabled"] assert "quotas" in public_key assert safe.get_path(cfg, "slug") == default_project.slug last_change = safe.get_path(cfg, "lastChange") assert _date_regex.match(last_change) is not None last_fetch = safe.get_path(cfg, "lastFetch") assert _date_regex.match(last_fetch) is not None assert safe.get_path(cfg, "organizationId") == default_project.organization.id assert safe.get_path(cfg, "projectId") == default_project.id assert safe.get_path(cfg, "slug") == default_project.slug assert safe.get_path(cfg, "rev") is not None assert safe.get_path(cfg, "config", "trustedRelays") == [] assert safe.get_path(cfg, "config", "filterSettings") is not None assert safe.get_path(cfg, "config", "groupingConfig", "enhancements") is not None assert safe.get_path(cfg, "config", "groupingConfig", "id") is not None assert safe.get_path(cfg, "config", "piiConfig", "applications") is None assert safe.get_path(cfg, "config", "piiConfig", "rules") is None assert safe.get_path(cfg, "config", "datascrubbingSettings", "scrubData") is True assert safe.get_path(cfg, "config", "datascrubbingSettings", "scrubDefaults") is True assert safe.get_path(cfg, "config", "datascrubbingSettings", "scrubIpAddresses") is True assert safe.get_path(cfg, "config", "datascrubbingSettings", "sensitiveFields") == [] assert safe.get_path(cfg, "config", "quotas") == [] # Event retention depends on settings, so assert the actual value. Likely # `None` in dev, but must not be missing. assert cfg["config"]["eventRetention"] == quotas.get_event_retention( default_project.organization)
def publish(self, group, event, is_new, is_sample, is_regression, is_new_group_environment, primary_hash, skip_consume=False): project = event.project retention_days = quotas.get_event_retention( organization=Organization(project.organization_id)) # Polling the producer is required to ensure callbacks are fired. This # means that the latency between a message being delivered (or failing # to be delivered) and the corresponding callback being fired is # roughly the same as the duration of time that passes between publish # calls. If this ends up being too high, the publisher should be moved # into a background thread that can poll more frequently without # interfering with request handling. (This does `poll` does not act as # a heartbeat for the purposes of any sort of session expiration.) self.producer.poll(0.0) try: key = '%s:%s' % (event.project_id, event.event_id) value = (EVENT_PROTOCOL_VERSION, 'insert', { 'group_id': event.group_id, 'event_id': event.event_id, 'organization_id': project.organization_id, 'project_id': event.project_id, 'message': event.message, 'platform': event.platform, 'datetime': event.datetime, 'data': event.data.data, 'primary_hash': primary_hash, 'retention_days': retention_days, }, { 'is_new': is_new, 'is_sample': is_sample, 'is_regression': is_regression, 'is_new_group_environment': is_new_group_environment, }) self.producer.produce( self.publish_topic, key=key.encode('utf-8'), value=json.dumps(value), on_delivery=self.delivery_callback, ) except Exception as error: logger.warning('Could not publish event: %s', error, exc_info=True) raise
def insert(self, group, event, is_new, is_sample, is_regression, is_new_group_environment, primary_hash, skip_consume=False): project = event.project retention_days = quotas.get_event_retention( organization=project.organization, ) event_data = event.get_raw_data() unexpected_tags = set([ k for (k, v) in (get_path(event_data, 'tags', filter=True) or []) if k in self.UNEXPECTED_TAG_KEYS ]) if unexpected_tags: logger.error('%r received unexpected tags: %r', self, unexpected_tags) self._send( project.id, 'insert', extra_data=( { 'group_id': event.group_id, 'event_id': event.event_id, 'organization_id': project.organization_id, 'project_id': event.project_id, # TODO(mitsuhiko): We do not want to send this incorrect # message but this is what snuba needs at the moment. 'message': event.message, 'platform': event.platform, 'datetime': event.datetime, 'data': event_data, 'primary_hash': primary_hash, 'retention_days': retention_days, }, { 'is_new': is_new, 'is_sample': is_sample, 'is_regression': is_regression, 'is_new_group_environment': is_new_group_environment, 'skip_consume': skip_consume, }, ))
def outside_retention_with_modified_start(start, end, organization): """ Check if a start-end datetime range is outside an organizations retention period. Returns an updated start datetime if start is out of retention. """ retention = quotas.get_event_retention(organization=organization) if not retention: return False, start # Need to support timezone-aware and naive datetimes since # Snuba API only deals in naive UTC now = datetime.utcnow().astimezone(pytz.utc) if start.tzinfo else datetime.utcnow() start = max(start, now - timedelta(days=retention)) return start > end, start
def insert(self, group, event, is_new, is_sample, is_regression, is_new_group_environment, primary_hash, skip_consume=False): if options.get('eventstream.kafka.send-post_process-task'): super(KafkaEventStream, self).insert(group, event, is_new, is_sample, is_regression, is_new_group_environment, primary_hash, skip_consume) project = event.project retention_days = quotas.get_event_retention( organization=Organization(project.organization_id)) self._send( project.id, 'insert', extra_data=( { 'group_id': event.group_id, 'event_id': event.event_id, 'organization_id': project.organization_id, 'project_id': event.project_id, # TODO(mitsuhiko): We do not want to send this incorrect # message but this is what snuba needs at the moment. 'message': event.message, 'platform': event.platform, 'datetime': event.datetime, 'data': dict(event.data.items()), 'primary_hash': primary_hash, 'retention_days': retention_days, }, { 'is_new': is_new, 'is_sample': is_sample, 'is_regression': is_regression, 'is_new_group_environment': is_new_group_environment, 'skip_consume': skip_consume, }, ))
def publish(self, group, event, is_new, is_sample, is_regression, is_new_group_environment, primary_hash, skip_consume=False): project = event.project retention_days = quotas.get_event_retention( organization=Organization(project.organization_id) ) # Polling the producer is required to ensure callbacks are fired. This # means that the latency between a message being delivered (or failing # to be delivered) and the corresponding callback being fired is # roughly the same as the duration of time that passes between publish # calls. If this ends up being too high, the publisher should be moved # into a background thread that can poll more frequently without # interfering with request handling. (This does `poll` does not act as # a heartbeat for the purposes of any sort of session expiration.) self.producer.poll(0.0) try: key = '%s:%s' % (event.project_id, event.event_id) value = (EVENT_PROTOCOL_VERSION, 'insert', { 'group_id': event.group_id, 'event_id': event.event_id, 'organization_id': project.organization_id, 'project_id': event.project_id, 'message': event.message, 'platform': event.platform, 'datetime': event.datetime, 'data': event.data.data, 'primary_hash': primary_hash, 'retention_days': retention_days, }, { 'is_new': is_new, 'is_sample': is_sample, 'is_regression': is_regression, 'is_new_group_environment': is_new_group_environment, }) self.producer.produce( self.publish_topic, key=key.encode('utf-8'), value=json.dumps(value), on_delivery=self.delivery_callback, ) except Exception as error: logger.warning('Could not publish event: %s', error, exc_info=True) raise
def insert(self, group, event, is_new, is_sample, is_regression, is_new_group_environment, primary_hash, skip_consume=False): # ensure the superclass's insert() is called, regardless of what happens # attempting to send to Kafka super(KafkaEventStream, self).insert(group, event, is_new, is_sample, is_regression, is_new_group_environment, primary_hash, skip_consume) project = event.project retention_days = quotas.get_event_retention( organization=Organization(project.organization_id)) self._send(project.id, 'insert', extra_data=( { 'group_id': event.group_id, 'event_id': event.event_id, 'organization_id': project.organization_id, 'project_id': event.project_id, 'message': event.message, 'platform': event.platform, 'datetime': event.datetime, 'data': dict(event.data.items()), 'primary_hash': primary_hash, 'retention_days': retention_days, }, { 'is_new': is_new, 'is_sample': is_sample, 'is_regression': is_regression, 'is_new_group_environment': is_new_group_environment, }, ))
def publish(self, group, event, is_new, is_sample, is_regression, is_new_group_environment, primary_hash, skip_consume=False): project = event.project retention_days = quotas.get_event_retention( organization=Organization(project.organization_id)) try: key = '%s:%s' % (event.project_id, event.event_id) value = (EVENT_PROTOCOL_VERSION, 'insert', { 'group_id': event.group_id, 'event_id': event.event_id, 'organization_id': project.organization_id, 'project_id': event.project_id, 'message': event.message, 'platform': event.platform, 'datetime': event.datetime, 'data': event.data.data, 'primary_hash': primary_hash, 'retention_days': retention_days, }, { 'is_new': is_new, 'is_sample': is_sample, 'is_regression': is_regression, 'is_new_group_environment': is_new_group_environment, }) self.pubsub.publish(self.publish_topic, key=key.encode('utf-8'), value=json.dumps(value)) except Exception as error: logger.warning('Could not publish event: %s', error, exc_info=True) raise
def get(self, request, project): """ List a Project's Events ``````````````````````` Return a list of events bound to a project. Note: This endpoint is experimental and may be removed without notice. :pparam string organization_slug: the slug of the organization the groups belong to. :pparam string project_slug: the slug of the project the groups belong to. """ events = Event.objects.filter( project_id=project.id, ) query = request.GET.get('query') if query: events = events.filter( message__icontains=query, ) # filter out events which are beyond the retention period retention = quotas.get_event_retention(organization=project.organization) if retention: events = events.filter( datetime__gte=timezone.now() - timedelta(days=retention) ) return self.paginate( request=request, queryset=events, order_by='-datetime', on_results=lambda x: serialize(x, request.user), paginator_cls=DateTimePaginator, )
def _get_events_legacy(self, request, group, environment, query, tags): events = Event.objects.filter(group_id=group.id) if query: q = Q(message__icontains=query) if is_event_id(query): q |= Q(event_id__exact=query) events = events.filter(q) if tags: event_filter = tagstore.get_group_event_filter( group.project_id, group.id, environment.id if environment is not None else None, tags, ) if not event_filter: return Response([]) events = events.filter(**event_filter) # filter out events which are beyond the retention period retention = quotas.get_event_retention( organization=group.project.organization) if retention: events = events.filter(datetime__gte=timezone.now() - timedelta(days=retention)) return self.paginate( request=request, queryset=events, order_by='-datetime', on_results=lambda x: serialize(x, request.user), paginator_cls=DateTimePaginator, )
def insert(self, group, event, is_new, is_sample, is_regression, is_new_group_environment, primary_hash, skip_consume=False): project = event.project retention_days = quotas.get_event_retention( organization=project.organization, ) event_data = event.get_raw_data() unexpected_tags = set([ k for (k, v) in (get_path(event_data, 'tags', filter=True) or []) if k in self.UNEXPECTED_TAG_KEYS ]) if unexpected_tags: logger.error('%r received unexpected tags: %r', self, unexpected_tags) self._send(project.id, 'insert', extra_data=({ 'group_id': event.group_id, 'event_id': event.event_id, 'organization_id': project.organization_id, 'project_id': event.project_id, # TODO(mitsuhiko): We do not want to send this incorrect # message but this is what snuba needs at the moment. 'message': event.message, 'platform': event.platform, 'datetime': event.datetime, 'data': event_data, 'primary_hash': primary_hash, 'retention_days': retention_days, }, { 'is_new': is_new, 'is_sample': is_sample, 'is_regression': is_regression, 'is_new_group_environment': is_new_group_environment, 'skip_consume': skip_consume, },))
def insert(self, group, event, is_new, is_sample, is_regression, is_new_group_environment, primary_hash, skip_consume=False): project = event.project retention_days = quotas.get_event_retention( organization=Organization(project.organization_id) ) self._send(project.id, 'insert', extra_data=({ 'group_id': event.group_id, 'event_id': event.event_id, 'organization_id': project.organization_id, 'project_id': event.project_id, 'message': event.message, 'platform': event.platform, 'datetime': event.datetime, 'data': dict(event.data.items()), 'primary_hash': primary_hash, 'retention_days': retention_days, }, { 'is_new': is_new, 'is_sample': is_sample, 'is_regression': is_regression, 'is_new_group_environment': is_new_group_environment, },))
def query(self, project, tags=None, environment=None, sort_by='date', limit=100, cursor=None, count_hits=False, paginator_options=None, **parameters): from sentry.models import Group, GroupStatus, GroupSubscription, Release if paginator_options is None: paginator_options = {} if tags is None: tags = {} try: if tags.get('sentry:release') == 'latest': tags['sentry:release'] = get_latest_release( project, environment) if parameters.get('first_release') == 'latest': parameters['first_release'] = get_latest_release( project, environment) except Release.DoesNotExist: # no matches could possibly be found from this point on return Paginator(Group.objects.none()).get_result() group_queryset = QuerySetBuilder({ 'query': CallbackCondition( lambda queryset, query: queryset.filter( Q(message__icontains=query) | Q(culprit__icontains=query), ) if query else queryset, ), 'status': CallbackCondition( lambda queryset, status: queryset.filter(status=status), ), 'bookmarked_by': CallbackCondition( lambda queryset, user: queryset.filter( bookmark_set__project=project, bookmark_set__user=user, ), ), 'assigned_to': CallbackCondition( functools.partial(assigned_to_filter, project=project), ), 'unassigned': CallbackCondition( lambda queryset, unassigned: queryset.filter( assignee_set__isnull=unassigned, ), ), 'subscribed_by': CallbackCondition( lambda queryset, user: queryset.filter( id__in=GroupSubscription.objects.filter( project=project, user=user, is_active=True, ).values_list('group'), ), ), 'active_at_from': ScalarCondition('active_at', 'gt'), 'active_at_to': ScalarCondition('active_at', 'lt'), }).build( Group.objects.filter(project=project).exclude(status__in=[ GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS, GroupStatus.PENDING_MERGE, ]), parameters, ) # filter out groups which are beyond the retention period retention = quotas.get_event_retention( organization=project.organization) if retention: retention_window_start = timezone.now() - timedelta(days=retention) else: retention_window_start = None # TODO: This could be optimized when building querysets to identify # criteria that are logically impossible (e.g. if the upper bound # for last seen is before the retention window starts, no results # exist.) if retention_window_start: group_queryset = group_queryset.filter( last_seen__gte=retention_window_start) # This is a punt because the SnubaSearchBackend (a subclass) shares so much that it # seemed better to handle all the shared initialization and then handoff to the # actual backend. return self._query(project, retention_window_start, group_queryset, tags, environment, sort_by, limit, cursor, count_hits, paginator_options, **parameters)
def query( self, projects, environments=None, sort_by="date", limit=100, cursor=None, count_hits=False, paginator_options=None, search_filters=None, date_from=None, date_to=None, ): from sentry.models import Group, GroupStatus, GroupSubscription search_filters = search_filters if search_filters is not None else [] # ensure projects are from same org if len({p.organization_id for p in projects}) != 1: raise RuntimeError("Cross organization search not supported") if paginator_options is None: paginator_options = {} group_queryset = Group.objects.filter(project__in=projects).exclude( status__in=[ GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS, GroupStatus.PENDING_MERGE, ]) qs_builder_conditions = { "status": QCallbackCondition(lambda status: Q(status=status)), "bookmarked_by": QCallbackCondition(lambda user: Q( bookmark_set__project__in=projects, bookmark_set__user=user)), "assigned_to": QCallbackCondition( functools.partial(assigned_to_filter, projects=projects)), "unassigned": QCallbackCondition( functools.partial(unassigned_filter, projects=projects)), "subscribed_by": QCallbackCondition( lambda user: Q(id__in=GroupSubscription.objects.filter( project__in=projects, user=user, is_active=True). values_list("group"))), "active_at": ScalarCondition("active_at"), } group_queryset = QuerySetBuilder(qs_builder_conditions).build( group_queryset, search_filters) # filter out groups which are beyond the retention period retention = quotas.get_event_retention( organization=projects[0].organization) if retention: retention_window_start = timezone.now() - timedelta(days=retention) else: retention_window_start = None # TODO: This could be optimized when building querysets to identify # criteria that are logically impossible (e.g. if the upper bound # for last seen is before the retention window starts, no results # exist.) if retention_window_start: group_queryset = group_queryset.filter( last_seen__gte=retention_window_start) # TODO: It's possible `first_release` could be handled by Snuba. if environments is not None: environment_ids = [environment.id for environment in environments] group_queryset = group_queryset.filter( groupenvironment__environment_id__in=environment_ids) group_queryset = QuerySetBuilder({ "first_release": QCallbackCondition(lambda version: Q( # if environment(s) are selected, we just filter on the group # environment's first_release attribute. groupenvironment__first_release__organization_id=projects[ 0].organization_id, groupenvironment__first_release__version=version, groupenvironment__environment_id__in=environment_ids, )), "first_seen": ScalarCondition( "groupenvironment__first_seen", {"groupenvironment__environment_id__in": environment_ids}, ), }).build(group_queryset, search_filters) else: group_queryset = QuerySetBuilder({ "first_release": QCallbackCondition(lambda release_version: Q( # if no specific environments are supplied, we either choose any # groups/issues whose first release matches the given release_version, Q(first_release_id__in=Release.objects.filter( version=release_version, organization_id=projects[0].organization_id, )) | # or we choose any groups whose first occurrence in any environment and the latest release at # the time of the groups' first occurrence matches the given # release_version Q(id__in=GroupEnvironment.objects.filter( first_release__version=release_version, first_release__organization_id=projects[0]. organization_id, environment__organization_id=projects[ 0].organization_id, ).values_list("group_id")))), "first_seen": ScalarCondition("first_seen"), }).build(group_queryset, search_filters) query_executor = PostgresSnubaQueryExecutor() return query_executor.query( projects, retention_window_start, group_queryset, environments, sort_by, limit, cursor, count_hits, paginator_options, search_filters, date_from, date_to, )
def get_event_stats_data( self, request: Request, organization: Organization, get_event_stats: Callable[ [Sequence[str], str, Dict[str, str], int, bool, Optional[timedelta]], SnubaTSResult ], top_events: int = 0, query_column: str = "count()", params: Optional[Dict[str, Any]] = None, query: Optional[str] = None, allow_partial_buckets: bool = False, zerofill_results: bool = True, comparison_delta: Optional[timedelta] = None, ) -> Dict[str, Any]: with self.handle_query_errors(): with sentry_sdk.start_span( op="discover.endpoint", description="base.stats_query_creation" ): columns = request.GET.getlist("yAxis", [query_column]) if query is None: query = request.GET.get("query") if params is None: try: # events-stats is still used by events v1 which doesn't require global views params = self.get_snuba_params( request, organization, check_global_views=False ) except NoProjects: return {"data": []} try: rollup = get_rollup_from_request( request, params, default_interval=None, error=InvalidSearchQuery(), top_events=top_events, ) # If the user sends an invalid interval, use the default instead except InvalidSearchQuery: sentry_sdk.set_tag("user.invalid_interval", request.GET.get("interval")) date_range = params["end"] - params["start"] stats_period = parse_stats_period(get_interval_from_range(date_range, False)) rollup = int(stats_period.total_seconds()) if stats_period is not None else 3600 if comparison_delta is not None: retention = quotas.get_event_retention(organization=organization) comparison_start = params["start"] - comparison_delta if retention and comparison_start < timezone.now() - timedelta(days=retention): raise ValidationError("Comparison period is outside your retention window") # Backwards compatibility for incidents which uses the old # column aliases as it straddles both versions of events/discover. # We will need these aliases until discover2 flags are enabled for all # users. # We need these rollup columns to generate correct events-stats results column_map = { "user_count": "count_unique(user)", "event_count": "count()", "epm()": "epm(%d)" % rollup, "eps()": "eps(%d)" % rollup, "tpm()": "tpm(%d)" % rollup, "tps()": "tps(%d)" % rollup, } query_columns = [column_map.get(column, column) for column in columns] with sentry_sdk.start_span(op="discover.endpoint", description="base.stats_query"): result = get_event_stats( query_columns, query, params, rollup, zerofill_results, comparison_delta ) serializer = SnubaTSResultSerializer(organization, None, request.user) with sentry_sdk.start_span(op="discover.endpoint", description="base.stats_serialization"): # When the request is for top_events, result can be a SnubaTSResult in the event that # there were no top events found. In this case, result contains a zerofilled series # that acts as a placeholder. is_multiple_axis = len(query_columns) > 1 if top_events > 0 and isinstance(result, dict): results = {} for key, event_result in result.items(): if is_multiple_axis: results[key] = self.serialize_multiple_axis( serializer, event_result, columns, query_columns, allow_partial_buckets, zerofill_results=zerofill_results, ) else: # Need to get function alias if count is a field, but not the axis results[key] = serializer.serialize( event_result, column=resolve_axis_column(query_columns[0]), allow_partial_buckets=allow_partial_buckets, zerofill_results=zerofill_results, ) serialized_result = results elif is_multiple_axis: serialized_result = self.serialize_multiple_axis( serializer, result, columns, query_columns, allow_partial_buckets, zerofill_results=zerofill_results, ) else: extra_columns = None if comparison_delta: extra_columns = ["comparisonCount"] serialized_result = serializer.serialize( result, resolve_axis_column(query_columns[0]), allow_partial_buckets=allow_partial_buckets, zerofill_results=zerofill_results, extra_columns=extra_columns, ) return serialized_result
def get_project_config(project, full_config=True, project_keys=None): """ Constructs the ProjectConfig information. :param project: The project to load configuration for. Ensure that organization is bound on this object; otherwise it will be loaded from the database. :param full_config: True if only the full config is required, False if only the restricted (for external relays) is required (default True, i.e. full configuration) :param project_keys: Pre-fetched project keys for performance. However, if no project keys are provided it is assumed that the config does not need to contain auth information (this is the case when used in python's StoreView) :return: a ProjectConfig object for the given project """ with configure_scope() as scope: scope.set_tag("project", project.id) if project.status != ObjectStatus.VISIBLE: return ProjectConfig(project, disabled=True) public_keys = get_public_key_configs(project, full_config, project_keys=project_keys) with Hub.current.start_span(op="get_public_config"): now = datetime.utcnow().replace(tzinfo=utc) cfg = { "disabled": False, "slug": project.slug, "lastFetch": now, "lastChange": project.get_option("sentry:relay-rev-lastchange", now), "rev": project.get_option("sentry:relay-rev", uuid.uuid4().hex), "publicKeys": public_keys, "config": { "allowedDomains": list(get_origins(project)), "trustedRelays": [ r["public_key"] for r in project.organization.get_option( "sentry:trusted-relays", []) if r ], "piiConfig": get_pii_config(project), "datascrubbingSettings": get_datascrubbing_settings(project), }, "organizationId": project.organization_id, "projectId": project.id, # XXX: Unused by Relay, required by Python store } if not full_config: # This is all we need for external Relay processors return ProjectConfig(project, **cfg) with Hub.current.start_span(op="get_filter_settings"): cfg["config"]["filterSettings"] = get_filter_settings(project) with Hub.current.start_span(op="get_grouping_config_dict_for_project"): cfg["config"]["groupingConfig"] = get_grouping_config_dict_for_project( project) with Hub.current.start_span(op="get_event_retention"): cfg["config"]["eventRetention"] = quotas.get_event_retention( project.organization) with Hub.current.start_span(op="get_all_quotas"): cfg["config"]["quotas"] = get_quotas(project, keys=project_keys) return ProjectConfig(project, **cfg)
def _prepare_query_params(query_params): # convert to naive UTC datetimes, as Snuba only deals in UTC # and this avoids offset-naive and offset-aware issues start = naiveify_datetime(query_params.start) end = naiveify_datetime(query_params.end) with timer("get_snuba_map"): forward, reverse = get_snuba_translators( query_params.filter_keys, is_grouprelease=query_params.is_grouprelease ) if query_params.dataset in [Dataset.Events, Dataset.Discover, Dataset.Sessions]: (organization_id, params_to_update) = get_query_params_to_update_for_projects( query_params, with_org=query_params.dataset == Dataset.Sessions ) elif query_params.dataset in [Dataset.Outcomes, Dataset.OutcomesRaw]: (organization_id, params_to_update) = get_query_params_to_update_for_organizations( query_params ) else: raise UnqualifiedQueryError( "No strategy found for getting an organization for the given dataset." ) query_params.kwargs.update(params_to_update) for col, keys in six.iteritems(forward(deepcopy(query_params.filter_keys))): if keys: if len(keys) == 1 and None in keys: query_params.conditions.append((col, "IS NULL", None)) else: query_params.conditions.append((col, "IN", keys)) retention = quotas.get_event_retention(organization=Organization(organization_id)) if retention: start = max(start, datetime.utcnow() - timedelta(days=retention)) if start > end: raise QueryOutsideRetentionError # if `shrink_time_window` pushed `start` after `end` it means the user queried # a Group for T1 to T2 when the group was only active for T3 to T4, so the query # wouldn't return any results anyway new_start = shrink_time_window(query_params.filter_keys.get("group_id"), start) # TODO (alexh) this is a quick emergency fix for an occasion where a search # results in only 1 django candidate, which is then passed to snuba to # check and we raised because of it. Remove this once we figure out why the # candidate was returned from django at all if it existed only outside the # time range of the query if new_start <= end: start = new_start if start > end: raise QueryOutsideGroupActivityError query_params.kwargs.update( { "dataset": query_params.dataset.value, "from_date": start.isoformat(), "to_date": end.isoformat(), "groupby": query_params.groupby, "conditions": query_params.conditions, "aggregations": query_params.aggregations, "granularity": query_params.rollup, # TODO name these things the same } ) kwargs = {k: v for k, v in six.iteritems(query_params.kwargs) if v is not None} kwargs.update(OVERRIDE_OPTIONS) return kwargs, forward, reverse
def raw_query(start, end, groupby=None, conditions=None, filter_keys=None, aggregations=None, rollup=None, arrayjoin=None, limit=None, offset=None, orderby=None, having=None, referrer=None, is_grouprelease=False, selected_columns=None, totals=None, limitby=None): """ Sends a query to snuba. `conditions`: A list of (column, operator, literal) conditions to be passed to the query. Conditions that we know will not have to be translated should be passed this way (eg tag[foo] = bar). `filter_keys`: A dictionary of {col: [key, ...]} that will be converted into "col IN (key, ...)" conditions. These are used to restrict the query to known sets of project/issue/environment/release etc. Appropriate translations (eg. from environment model ID to environment name) are performed on the query, and the inverse translation performed on the result. The project_id(s) to restrict the query to will also be automatically inferred from these keys. `aggregations` a list of (aggregation_function, column, alias) tuples to be passed to the query. """ # convert to naive UTC datetimes, as Snuba only deals in UTC # and this avoids offset-naive and offset-aware issues start = naiveify_datetime(start) end = naiveify_datetime(end) groupby = groupby or [] conditions = conditions or [] having = having or [] aggregations = aggregations or [] filter_keys = filter_keys or {} selected_columns = selected_columns or [] with timer('get_snuba_map'): forward, reverse = get_snuba_translators(filter_keys, is_grouprelease=is_grouprelease) if 'project_id' in filter_keys: # If we are given a set of project ids, use those directly. project_ids = filter_keys['project_id'] elif filter_keys: # Otherwise infer the project_ids from any related models with timer('get_related_project_ids'): ids = [get_related_project_ids(k, filter_keys[k]) for k in filter_keys] project_ids = list(set.union(*map(set, ids))) else: project_ids = [] for col, keys in six.iteritems(forward(filter_keys.copy())): if keys: if len(keys) == 1 and keys[0] is None: conditions.append((col, 'IS NULL', None)) else: conditions.append((col, 'IN', keys)) if not project_ids: raise SnubaError("No project_id filter, or none could be inferred from other filters.") # any project will do, as they should all be from the same organization project = Project.objects.get(pk=project_ids[0]) retention = quotas.get_event_retention( organization=Organization(project.organization_id) ) if retention: start = max(start, datetime.utcnow() - timedelta(days=retention)) if start > end: raise QueryOutsideRetentionError use_group_id_column = options.get('snuba.use_group_id_column') issues = None if not use_group_id_column: # If the grouping, aggregation, or any of the conditions reference `issue` # we need to fetch the issue definitions (issue -> fingerprint hashes) aggregate_cols = [a[1] for a in aggregations] condition_cols = all_referenced_columns(conditions) all_cols = groupby + aggregate_cols + condition_cols + selected_columns get_issues = 'issue' in all_cols if get_issues: with timer('get_project_issues'): issues = get_project_issues(project_ids, filter_keys.get('issue')) start, end = shrink_time_window(filter_keys.get('issue'), start, end) # if `shrink_time_window` pushed `start` after `end` it means the user queried # a Group for T1 to T2 when the group was only active for T3 to T4, so the query # wouldn't return any results anyway if start > end: raise QueryOutsideGroupActivityError request = {k: v for k, v in six.iteritems({ 'from_date': start.isoformat(), 'to_date': end.isoformat(), 'conditions': conditions, 'having': having, 'groupby': groupby, 'totals': totals, 'project': project_ids, 'aggregations': aggregations, 'granularity': rollup, 'use_group_id_column': use_group_id_column, 'issues': issues, 'arrayjoin': arrayjoin, 'limit': limit, 'offset': offset, 'limitby': limitby, 'orderby': orderby, 'selected_columns': selected_columns, }) if v is not None} headers = {} if referrer: headers['referer'] = referrer try: with timer('snuba_query'): response = _snuba_pool.urlopen( 'POST', '/query', body=json.dumps(request), headers=headers) except urllib3.exceptions.HTTPError as err: raise SnubaError(err) try: body = json.loads(response.data) except ValueError: raise SnubaError(u"Could not decode JSON response: {}".format(response.data)) if response.status != 200: if body.get('error'): raise SnubaError(body['error']) else: raise SnubaError(u'HTTP {}'.format(response.status)) # Forward and reverse translation maps from model ids to snuba keys, per column body['data'] = [reverse(d) for d in body['data']] return body
def query(self, projects, tags=None, environments=None, sort_by='date', limit=100, cursor=None, count_hits=False, paginator_options=None, search_filters=None, use_new_filters=False, **parameters): from sentry.models import Group, GroupStatus, GroupSubscription search_filters = search_filters if search_filters is not None else [] # ensure projects are from same org if len({p.organization_id for p in projects}) != 1: raise RuntimeError('Cross organization search not supported') if paginator_options is None: paginator_options = {} if tags is None: tags = {} group_queryset = Group.objects.filter(project__in=projects).exclude(status__in=[ GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS, GroupStatus.PENDING_MERGE, ]) if use_new_filters: query_set_builder_class = SearchFilterQuerySetBuilder query_set_builder_params = search_filters else: query_set_builder_class = NewQuerySetBuilder query_set_builder_params = parameters group_queryset = query_set_builder_class({ 'message': QCallbackCondition( lambda query: Q( Q(message__icontains=query) | Q(culprit__icontains=query), ), skip_if_falsey=True, ), # TODO: Remove this once we've stopped using old params 'query': QCallbackCondition( lambda query: Q( Q(message__icontains=query) | Q(culprit__icontains=query), ), skip_if_falsey=True, ), 'status': QCallbackCondition( lambda status: Q(status=status), ), 'bookmarked_by': QCallbackCondition( lambda user: Q( bookmark_set__project__in=projects, bookmark_set__user=user, ), ), 'assigned_to': QCallbackCondition( functools.partial(assigned_to_filter, projects=projects), ), 'unassigned': QCallbackCondition( functools.partial(unassigned_filter, projects=projects), ), 'subscribed_by': QCallbackCondition( lambda user: Q( id__in=GroupSubscription.objects.filter( project__in=projects, user=user, is_active=True, ).values_list('group'), ), ), 'active_at': SearchFilterScalarCondition('active_at'), # TODO: These are legacy params. Once we've moved to SearchFilter # entirely then they can be removed, since the `'active_at'` # condition will handle both 'active_at_from': ScalarCondition('active_at', 'gt'), 'active_at_to': ScalarCondition('active_at', 'lt'), }).build(group_queryset, query_set_builder_params) # filter out groups which are beyond the retention period retention = quotas.get_event_retention(organization=projects[0].organization) if retention: retention_window_start = timezone.now() - timedelta(days=retention) else: retention_window_start = None # TODO: This could be optimized when building querysets to identify # criteria that are logically impossible (e.g. if the upper bound # for last seen is before the retention window starts, no results # exist.) if retention_window_start: group_queryset = group_queryset.filter(last_seen__gte=retention_window_start) # This is a punt because the SnubaSearchBackend (a subclass) shares so much that it # seemed better to handle all the shared initialization and then handoff to the # actual backend. return self._query(projects, retention_window_start, group_queryset, tags, environments, sort_by, limit, cursor, count_hits, paginator_options, search_filters, use_new_filters, **parameters)
def _build_queryset( self, project, query=None, status=None, tags=None, bookmarked_by=None, assigned_to=None, first_release=None, sort_by='date', unassigned=None, subscribed_by=None, age_from=None, age_from_inclusive=True, age_to=None, age_to_inclusive=True, last_seen_from=None, last_seen_from_inclusive=True, last_seen_to=None, last_seen_to_inclusive=True, date_from=None, date_from_inclusive=True, date_to=None, date_to_inclusive=True, active_at_from=None, active_at_from_inclusive=True, active_at_to=None, active_at_to_inclusive=True, times_seen=None, times_seen_lower=None, times_seen_lower_inclusive=True, times_seen_upper=None, times_seen_upper_inclusive=True, cursor=None, limit=None, environment=None, ): from sentry.models import Event, Group, GroupSubscription, GroupStatus, OrganizationMember if tags is None: tags = {} engine = get_db_engine('default') queryset = Group.objects.filter(project=project) if query: # TODO(dcramer): if we want to continue to support search on SQL # we should at least optimize this in Postgres so that it does # the query filter **after** the index filters, and restricts the # result set queryset = queryset.filter( Q(message__icontains=query) | Q(culprit__icontains=query)) if status is None: status_in = ( GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS, GroupStatus.PENDING_MERGE, ) queryset = queryset.exclude(status__in=status_in) else: queryset = queryset.filter(status=status) if bookmarked_by: queryset = queryset.filter( bookmark_set__project=project, bookmark_set__user=bookmarked_by, ) if assigned_to: teams = [] try: member = OrganizationMember.objects.get( user=assigned_to, organization_id=project.organization_id, ) except OrganizationMember.DoesNotExist: pass else: teams = member.get_teams() queryset = queryset.filter( Q(assignee_set__user=assigned_to, assignee_set__project=project) | Q(assignee_set__team__in=teams)) elif unassigned in (True, False): queryset = queryset.filter(assignee_set__isnull=unassigned, ) if subscribed_by is not None: queryset = queryset.filter(id__in=GroupSubscription.objects.filter( project=project, user=subscribed_by, is_active=True, ).values_list('group'), ) if first_release: if first_release is EMPTY: return queryset.none() queryset = queryset.filter( first_release__organization_id=project.organization_id, first_release__version=first_release, ) if environment is not None: # XXX: This overwrites the ``environment`` tag, if present, to # ensure that the result set is limited to groups that have been # seen in this environment (there is no way to search for groups # that match multiple values of a single tag without changes to the # tagstore API.) tags['environment'] = environment.name if tags: matches = tagstore.get_group_ids_for_search_filter( project.id, environment.id if environment is not None else None, tags, ) if not matches: return queryset.none() queryset = queryset.filter(id__in=matches, ) if age_from or age_to: params = {} if age_from: if age_from_inclusive: params['first_seen__gte'] = age_from else: params['first_seen__gt'] = age_from if age_to: if age_to_inclusive: params['first_seen__lte'] = age_to else: params['first_seen__lt'] = age_to queryset = queryset.filter(**params) if last_seen_from or last_seen_to: params = {} if last_seen_from: if last_seen_from_inclusive: params['last_seen__gte'] = last_seen_from else: params['last_seen__gt'] = last_seen_from if last_seen_to: if last_seen_to_inclusive: params['last_seen__lte'] = last_seen_to else: params['last_seen__lt'] = last_seen_to queryset = queryset.filter(**params) if active_at_from or active_at_to: params = {} if active_at_from: if active_at_from_inclusive: params['active_at__gte'] = active_at_from else: params['active_at__gt'] = active_at_from if active_at_to: if active_at_to_inclusive: params['active_at__lte'] = active_at_to else: params['active_at__lt'] = active_at_to queryset = queryset.filter(**params) if times_seen is not None: queryset = queryset.filter(times_seen=times_seen) if times_seen_lower is not None or times_seen_upper is not None: params = {} if times_seen_lower is not None: if times_seen_lower_inclusive: params['times_seen__gte'] = times_seen_lower else: params['times_seen__gt'] = times_seen_lower if times_seen_upper is not None: if times_seen_upper_inclusive: params['times_seen__lte'] = times_seen_upper else: params['times_seen__lt'] = times_seen_upper queryset = queryset.filter(**params) if date_from or date_to: params = { 'project_id': project.id, } if date_from: if date_from_inclusive: params['datetime__gte'] = date_from else: params['datetime__gt'] = date_from if date_to: if date_to_inclusive: params['datetime__lte'] = date_to else: params['datetime__lt'] = date_to event_queryset = Event.objects.filter(**params) if query: event_queryset = event_queryset.filter( message__icontains=query) # limit to the first 1000 results group_ids = event_queryset.distinct().values_list('group_id', flat=True)[:1000] # if Event is not on the primary database remove Django's # implicit subquery by coercing to a list base = router.db_for_read(Group) using = router.db_for_read(Event) # MySQL also cannot do a LIMIT inside of a subquery if base != using or engine.startswith('mysql'): group_ids = list(group_ids) queryset = queryset.filter(id__in=group_ids, ) if engine.startswith('sqlite'): score_clause = SQLITE_SORT_CLAUSES[sort_by] elif engine.startswith('mysql'): score_clause = MYSQL_SORT_CLAUSES[sort_by] elif engine.startswith('oracle'): score_clause = ORACLE_SORT_CLAUSES[sort_by] elif engine in MSSQL_ENGINES: score_clause = MSSQL_SORT_CLAUSES[sort_by] else: score_clause = SORT_CLAUSES[sort_by] # filter out groups which are beyond the retention period retention = quotas.get_event_retention( organization=project.organization) if retention: queryset = queryset.filter(last_seen__gte=timezone.now() - timedelta(days=retention)) queryset = queryset.extra(select={'sort_value': score_clause}, ) return queryset
def raw_query(start, end, groupby=None, conditions=None, filter_keys=None, aggregations=None, rollup=None, arrayjoin=None, limit=None, offset=None, orderby=None, having=None, referrer=None, is_grouprelease=False, selected_columns=None, totals=None, limitby=None, turbo=False): """ Sends a query to snuba. `conditions`: A list of (column, operator, literal) conditions to be passed to the query. Conditions that we know will not have to be translated should be passed this way (eg tag[foo] = bar). `filter_keys`: A dictionary of {col: [key, ...]} that will be converted into "col IN (key, ...)" conditions. These are used to restrict the query to known sets of project/issue/environment/release etc. Appropriate translations (eg. from environment model ID to environment name) are performed on the query, and the inverse translation performed on the result. The project_id(s) to restrict the query to will also be automatically inferred from these keys. `aggregations` a list of (aggregation_function, column, alias) tuples to be passed to the query. """ # convert to naive UTC datetimes, as Snuba only deals in UTC # and this avoids offset-naive and offset-aware issues start = naiveify_datetime(start) end = naiveify_datetime(end) groupby = groupby or [] conditions = conditions or [] having = having or [] aggregations = aggregations or [] filter_keys = filter_keys or {} selected_columns = selected_columns or [] with timer('get_snuba_map'): forward, reverse = get_snuba_translators(filter_keys, is_grouprelease=is_grouprelease) if 'project_id' in filter_keys: # If we are given a set of project ids, use those directly. project_ids = list(set(filter_keys['project_id'])) elif filter_keys: # Otherwise infer the project_ids from any related models with timer('get_related_project_ids'): ids = [get_related_project_ids(k, filter_keys[k]) for k in filter_keys] project_ids = list(set.union(*map(set, ids))) else: project_ids = [] for col, keys in six.iteritems(forward(filter_keys.copy())): if keys: if len(keys) == 1 and None in keys: conditions.append((col, 'IS NULL', None)) else: conditions.append((col, 'IN', keys)) if not project_ids: raise UnqualifiedQueryError( "No project_id filter, or none could be inferred from other filters.") # any project will do, as they should all be from the same organization project = Project.objects.get(pk=project_ids[0]) retention = quotas.get_event_retention( organization=Organization(project.organization_id) ) if retention: start = max(start, datetime.utcnow() - timedelta(days=retention)) if start > end: raise QueryOutsideRetentionError start = shrink_time_window(filter_keys.get('issue'), start) # if `shrink_time_window` pushed `start` after `end` it means the user queried # a Group for T1 to T2 when the group was only active for T3 to T4, so the query # wouldn't return any results anyway if start > end: raise QueryOutsideGroupActivityError request = {k: v for k, v in six.iteritems({ 'from_date': start.isoformat(), 'to_date': end.isoformat(), 'conditions': conditions, 'having': having, 'groupby': groupby, 'totals': totals, 'project': project_ids, 'aggregations': aggregations, 'granularity': rollup, 'arrayjoin': arrayjoin, 'limit': limit, 'offset': offset, 'limitby': limitby, 'orderby': orderby, 'selected_columns': selected_columns, 'turbo': turbo }) if v is not None} request.update(OVERRIDE_OPTIONS) headers = {} if referrer: headers['referer'] = referrer try: with timer('snuba_query'): response = _snuba_pool.urlopen( 'POST', '/query', body=json.dumps(request), headers=headers) except urllib3.exceptions.HTTPError as err: raise SnubaError(err) try: body = json.loads(response.data) except ValueError: raise UnexpectedResponseError(u"Could not decode JSON response: {}".format(response.data)) if response.status != 200: if body.get('error'): error = body['error'] if response.status == 429: raise RateLimitExceeded(error['message']) elif error['type'] == 'schema': raise SchemaValidationError(error['message']) elif error['type'] == 'clickhouse': raise clickhouse_error_codes_map.get( error['code'], QueryExecutionError, )(error['message']) else: raise SnubaError(error['message']) else: raise SnubaError(u'HTTP {}'.format(response.status)) # Forward and reverse translation maps from model ids to snuba keys, per column body['data'] = [reverse(d) for d in body['data']] return body
def query(self, project, tags=None, environment=None, sort_by='date', limit=100, cursor=None, count_hits=False, paginator_options=None, **parameters): from sentry.models import (Environment, Event, Group, GroupEnvironment, GroupStatus, GroupSubscription, Release) if paginator_options is None: paginator_options = {} if tags is None: tags = {} try: if tags.get('sentry:release') == 'latest': tags['sentry:release'] = get_latest_release( project, environment) if parameters.get('first_release') == 'latest': parameters['first_release'] = get_latest_release( project, environment) except Release.DoesNotExist: # no matches could possibly be found from this point on return Paginator(Group.objects.none()).get_result() group_queryset = QuerySetBuilder({ 'query': CallbackCondition( lambda queryset, query: queryset.filter( Q(message__icontains=query) | Q(culprit__icontains=query), ) if query else queryset, ), 'status': CallbackCondition( lambda queryset, status: queryset.filter(status=status), ), 'bookmarked_by': CallbackCondition( lambda queryset, user: queryset.filter( bookmark_set__project=project, bookmark_set__user=user, ), ), 'assigned_to': CallbackCondition( functools.partial(assigned_to_filter, project=project), ), 'unassigned': CallbackCondition( lambda queryset, unassigned: queryset.filter( assignee_set__isnull=unassigned, ), ), 'subscribed_by': CallbackCondition( lambda queryset, user: queryset.filter( id__in=GroupSubscription.objects.filter( project=project, user=user, is_active=True, ).values_list('group'), ), ), 'active_at_from': ScalarCondition('active_at', 'gt'), 'active_at_to': ScalarCondition('active_at', 'lt'), }).build( Group.objects.filter(project=project).exclude(status__in=[ GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS, GroupStatus.PENDING_MERGE, ]), parameters, ) # filter out groups which are beyond the retention period retention = quotas.get_event_retention( organization=project.organization) if retention: retention_window_start = timezone.now() - timedelta(days=retention) # TODO: This could be optimized when building querysets to identify # criteria that are logically impossible (e.g. if the upper bound # for last seen is before the retention window starts, no results # exist.) group_queryset = group_queryset.filter( last_seen__gte=retention_window_start) else: retention_window_start = None if environment is not None: if 'environment' in tags: # TODO: This should probably just overwrite the existing tag, # rather than asserting on it, but...? assert Environment.objects.get( projects=project, name=tags.pop('environment'), ).id == environment.id event_queryset_builder = QuerySetBuilder({ 'date_from': ScalarCondition('date_added', 'gt'), 'date_to': ScalarCondition('date_added', 'lt'), }) if any(key in parameters for key in event_queryset_builder.conditions.keys()): event_queryset = event_queryset_builder.build( tagstore.get_event_tag_qs( project.id, environment.id, 'environment', environment.name, ), parameters, ) if retention_window_start is not None: event_queryset = event_queryset.filter( date_added__gte=retention_window_start) group_queryset = group_queryset.filter( id__in=list(event_queryset.distinct().values_list( 'group_id', flat=True)[:1000])) group_queryset = QuerySetBuilder({ 'first_release': CallbackCondition( lambda queryset, version: queryset.extra( where=[ '{} = {}'.format( get_sql_column(GroupEnvironment, 'first_release_id'), get_sql_column(Release, 'id'), ), '{} = %s'.format( get_sql_column(Release, 'organization'), ), '{} = %s'.format( get_sql_column(Release, 'version'), ), ], params=[project.organization_id, version], tables=[Release._meta.db_table], ), ), 'times_seen': CallbackCondition( # This condition represents the exact number of times that # an issue has been seen in an environment. Since an issue # can't be seen in an environment more times than the issue # was seen overall, we can safely exclude any groups that # don't have at least that many events. lambda queryset, times_seen: queryset.exclude( times_seen__lt=times_seen, ), ), 'times_seen_lower': CallbackCondition( # This condition represents the lower threshold for the # number of times an issue has been seen in an environment. # Since an issue can't be seen in an environment more times # than the issue was seen overall, we can safely exclude # any groups that haven't met that threshold. lambda queryset, times_seen: queryset.exclude( times_seen__lt=times_seen, ), ), # The following conditions make a few assertions that are are # correct in an abstract sense but may not accurately reflect # the existing implementation (see GH-5289). These assumptions # are that 1. The first seen time for a Group is the minimum # value of the first seen time for all of it's GroupEnvironment # relations; 2. The last seen time for a Group is the maximum # value of the last seen time for all of it's GroupEnvironment # relations; 3. The first seen time is always less than or # equal to the last seen time. 'age_from': CallbackCondition( # This condition represents the lower threshold for "first # seen" time for an environment. Due to assertions #1 and # #3, we can exclude any groups where the "last seen" time # is prior to this timestamp. lambda queryset, first_seen: queryset.exclude( last_seen__lt=first_seen, ), ), 'age_to': CallbackCondition( # This condition represents the upper threshold for "first # seen" time for an environment. Due to assertions #1, we # can exclude any values where the group first seen is # greater than that threshold. lambda queryset, first_seen: queryset.exclude( first_seen__gt=first_seen, ), ), 'last_seen_from': CallbackCondition( # This condition represents the lower threshold for "last # seen" time for an environment. Due to assertion #2, we # can exclude any values where the group last seen value is # less than that threshold. lambda queryset, last_seen: queryset.exclude(last_seen__lt= last_seen, ), ), 'last_seen_to': CallbackCondition( # This condition represents the upper threshold for "last # seen" time for an environment. Due to assertions #2 and # #3, we can exclude any values where the group first seen # value is greater than that threshold. lambda queryset, last_seen: queryset.exclude(first_seen__gt =last_seen, ), ), }).build( group_queryset.extra( where=[ '{} = {}'.format( get_sql_column(Group, 'id'), get_sql_column(GroupEnvironment, 'group_id'), ), '{} = %s'.format( get_sql_column(GroupEnvironment, 'environment_id'), ), ], params=[environment.id], tables=[GroupEnvironment._meta.db_table], ), parameters, ) get_sort_expression, sort_value_to_cursor_value = environment_sort_strategies[ sort_by] group_tag_value_queryset = tagstore.get_group_tag_value_qs( project.id, set(group_queryset.values_list('id', flat=True)), # TODO: Limit?, environment.id, 'environment', environment.name, ) if retention_window_start is not None: group_tag_value_queryset = group_tag_value_queryset.filter( last_seen__gte=retention_window_start) candidates = dict( QuerySetBuilder({ 'age_from': ScalarCondition('first_seen', 'gt'), 'age_to': ScalarCondition('first_seen', 'lt'), 'last_seen_from': ScalarCondition('last_seen', 'gt'), 'last_seen_to': ScalarCondition('last_seen', 'lt'), 'times_seen': CallbackCondition( lambda queryset, times_seen: queryset.filter( times_seen=times_seen), ), 'times_seen_lower': ScalarCondition('times_seen', 'gt'), 'times_seen_upper': ScalarCondition('times_seen', 'lt'), }).build( group_tag_value_queryset, parameters, ).extra(select={ 'sort_value': get_sort_expression(group_tag_value_queryset.model), }, ).values_list('group_id', 'sort_value')) if tags: # TODO: `get_group_ids_for_search_filter` should be able to # utilize the retention window start parameter for additional # optimizations. matches = tagstore.get_group_ids_for_search_filter( project.id, environment.id, tags, candidates.keys(), limit=len(candidates), ) for key in set(candidates) - set(matches or []): del candidates[key] result = SequencePaginator([(sort_value_to_cursor_value(score), id) for (id, score) in candidates.items()], reverse=True, **paginator_options).get_result( limit, cursor, count_hits=count_hits) groups = Group.objects.in_bulk(result.results) result.results = [groups[k] for k in result.results if k in groups] return result else: event_queryset_builder = QuerySetBuilder({ 'date_from': ScalarCondition('datetime', 'gt'), 'date_to': ScalarCondition('datetime', 'lt'), }) if any(key in parameters for key in event_queryset_builder.conditions.keys()): group_queryset = group_queryset.filter(id__in=list( event_queryset_builder.build( Event.objects.filter(project_id=project.id), parameters, ).distinct().values_list('group_id', flat=True)[:1000], )) group_queryset = QuerySetBuilder({ 'first_release': CallbackCondition( lambda queryset, version: queryset.filter( first_release__organization_id=project.organization_id, first_release__version=version, ), ), 'age_from': ScalarCondition('first_seen', 'gt'), 'age_to': ScalarCondition('first_seen', 'lt'), 'last_seen_from': ScalarCondition('last_seen', 'gt'), 'last_seen_to': ScalarCondition('last_seen', 'lt'), 'times_seen': CallbackCondition( lambda queryset, times_seen: queryset.filter(times_seen= times_seen), ), 'times_seen_lower': ScalarCondition('times_seen', 'gt'), 'times_seen_upper': ScalarCondition('times_seen', 'lt'), }).build( group_queryset, parameters, ).extra(select={ 'sort_value': get_sort_clause(sort_by), }, ) if tags: matches = tagstore.get_group_ids_for_search_filter( project.id, None, tags) if matches: group_queryset = group_queryset.filter(id__in=matches) else: group_queryset = group_queryset.none() paginator_cls, sort_clause = sort_strategies[sort_by] group_queryset = group_queryset.order_by(sort_clause) paginator = paginator_cls(group_queryset, sort_clause, **paginator_options) return paginator.get_result(limit, cursor, count_hits=count_hits)
def get(self, request, group): """ List an Issue's Events `````````````````````` This endpoint lists an issue's events. :pparam string issue_id: the ID of the issue to retrieve. :auth: required """ def respond(queryset): return self.paginate( request=request, queryset=queryset, order_by='-datetime', on_results=lambda x: serialize(x, request.user), paginator_cls=DateTimePaginator, ) events = Event.objects.filter(group_id=group.id) try: environment = self._get_environment_from_request( request, group.project.organization_id, ) except Environment.DoesNotExist: return respond(events.none()) raw_query = request.GET.get('query') if raw_query: try: query_kwargs = parse_query(group.project, raw_query, request.user) except InvalidQuery as exc: return Response({'detail': six.text_type(exc)}, status=400) else: query = query_kwargs.pop('query', None) tags = query_kwargs.pop('tags', {}) else: query = None tags = {} if environment is not None: if 'environment' in tags and tags[ 'environment'] != environment.name: # An event can only be associated with a single # environment, so if the environment associated with # the request is different than the environment # provided as a tag lookup, the query cannot contain # any valid results. return respond(events.none()) else: tags['environment'] = environment.name if query: q = Q(message__icontains=query) if len(query) == 32: q |= Q(event_id__exact=query) events = events.filter(q) # TODO currently snuba can be used to get this filter of event_ids matching # the search tags, which is then used to further filter a postgres QuerySet # Ideally we would just use snuba to completely replace the fetching of the # events. if tags: event_filter = tagstore.get_group_event_filter( group.project_id, group.id, environment.id if environment is not None else None, tags, ) if not event_filter: return respond(events.none()) events = events.filter(**event_filter) # filter out events which are beyond the retention period retention = quotas.get_event_retention( organization=group.project.organization) if retention: events = events.filter(datetime__gte=timezone.now() - timedelta(days=retention)) return respond(events)
def query( self, projects, environments=None, sort_by="date", limit=100, cursor=None, count_hits=False, paginator_options=None, search_filters=None, date_from=None, date_to=None, ): from sentry.models import Group, GroupStatus, GroupSubscription search_filters = search_filters if search_filters is not None else [] # ensure projects are from same org if len({p.organization_id for p in projects}) != 1: raise RuntimeError("Cross organization search not supported") if paginator_options is None: paginator_options = {} group_queryset = Group.objects.filter(project__in=projects).exclude( status__in=[ GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS, GroupStatus.PENDING_MERGE, ] ) qs_builder_conditions = { "status": QCallbackCondition(lambda status: Q(status=status)), "bookmarked_by": QCallbackCondition( lambda user: Q(bookmark_set__project__in=projects, bookmark_set__user=user) ), "assigned_to": QCallbackCondition( functools.partial(assigned_to_filter, projects=projects) ), "unassigned": QCallbackCondition( functools.partial(unassigned_filter, projects=projects) ), "subscribed_by": QCallbackCondition( lambda user: Q( id__in=GroupSubscription.objects.filter( project__in=projects, user=user, is_active=True ).values_list("group") ) ), "active_at": ScalarCondition("active_at"), } group_queryset = QuerySetBuilder(qs_builder_conditions).build( group_queryset, search_filters ) # filter out groups which are beyond the retention period retention = quotas.get_event_retention(organization=projects[0].organization) if retention: retention_window_start = timezone.now() - timedelta(days=retention) else: retention_window_start = None # TODO: This could be optimized when building querysets to identify # criteria that are logically impossible (e.g. if the upper bound # for last seen is before the retention window starts, no results # exist.) if retention_window_start: group_queryset = group_queryset.filter(last_seen__gte=retention_window_start) # This is a punt because the SnubaSearchBackend (a subclass) shares so much that it # seemed better to handle all the shared initialization and then handoff to the # actual backend. return self._query( projects, retention_window_start, group_queryset, environments, sort_by, limit, cursor, count_hits, paginator_options, search_filters, date_from, date_to, )
def get(self, request, group): """ List an Issue's Events `````````````````````` This endpoint lists an issue's events. :pparam string issue_id: the ID of the issue to retrieve. :auth: required """ def respond(queryset): return self.paginate( request=request, queryset=queryset, order_by='-datetime', on_results=lambda x: serialize(x, request.user), paginator_cls=DateTimePaginator, ) events = Event.objects.filter(group_id=group.id) try: environment = self._get_environment_from_request( request, group.project.organization_id, ) except Environment.DoesNotExist: return respond(events.none()) raw_query = request.GET.get('query') if raw_query: try: query_kwargs = parse_query(group.project, raw_query, request.user) except InvalidQuery as exc: return Response({'detail': six.text_type(exc)}, status=400) else: query = query_kwargs.pop('query', None) tags = query_kwargs.pop('tags', {}) else: query = None tags = {} if environment is not None: if 'environment' in tags and tags['environment'] != environment.name: # An event can only be associated with a single # environment, so if the environment associated with # the request is different than the environment # provided as a tag lookup, the query cannot contain # any valid results. return respond(events.none()) else: tags['environment'] = environment.name if query: q = Q(message__icontains=query) if len(query) == 32: q |= Q(event_id__exact=query) events = events.filter(q) if tags: event_ids = tagstore.get_group_event_ids( group.project_id, group.id, environment.id if environment is not None else None, tags, ) if not event_ids: return respond(events.none()) events = events.filter(id__in=event_ids) # filter out events which are beyond the retention period retention = quotas.get_event_retention(organization=group.project.organization) if retention: events = events.filter( datetime__gte=timezone.now() - timedelta(days=retention) ) return respond(events)
def raw_query(start, end, groupby=None, conditions=None, filter_keys=None, aggregations=None, rollup=None, referrer=None, is_grouprelease=False, **kwargs): """ Sends a query to snuba. `start` and `end`: The beginning and end of the query time window (required) `groupby`: A list of column names to group by. `conditions`: A list of (column, operator, literal) conditions to be passed to the query. Conditions that we know will not have to be translated should be passed this way (eg tag[foo] = bar). `filter_keys`: A dictionary of {col: [key, ...]} that will be converted into "col IN (key, ...)" conditions. These are used to restrict the query to known sets of project/issue/environment/release etc. Appropriate translations (eg. from environment model ID to environment name) are performed on the query, and the inverse translation performed on the result. The project_id(s) to restrict the query to will also be automatically inferred from these keys. `aggregations` a list of (aggregation_function, column, alias) tuples to be passed to the query. The rest of the args are passed directly into the query JSON unmodified. See the snuba schema for details. """ # convert to naive UTC datetimes, as Snuba only deals in UTC # and this avoids offset-naive and offset-aware issues start = naiveify_datetime(start) end = naiveify_datetime(end) groupby = groupby or [] conditions = conditions or [] aggregations = aggregations or [] filter_keys = filter_keys or {} with timer('get_snuba_map'): forward, reverse = get_snuba_translators(filter_keys, is_grouprelease=is_grouprelease) if 'project_id' in filter_keys: # If we are given a set of project ids, use those directly. project_ids = list(set(filter_keys['project_id'])) elif filter_keys: # Otherwise infer the project_ids from any related models with timer('get_related_project_ids'): ids = [get_related_project_ids(k, filter_keys[k]) for k in filter_keys] project_ids = list(set.union(*map(set, ids))) else: project_ids = [] for col, keys in six.iteritems(forward(deepcopy(filter_keys))): if keys: if len(keys) == 1 and None in keys: conditions.append((col, 'IS NULL', None)) else: conditions.append((col, 'IN', keys)) if not project_ids: raise UnqualifiedQueryError( "No project_id filter, or none could be inferred from other filters.") # any project will do, as they should all be from the same organization project = Project.objects.get(pk=project_ids[0]) retention = quotas.get_event_retention( organization=Organization(project.organization_id) ) if retention: start = max(start, datetime.utcnow() - timedelta(days=retention)) if start > end: raise QueryOutsideRetentionError # if `shrink_time_window` pushed `start` after `end` it means the user queried # a Group for T1 to T2 when the group was only active for T3 to T4, so the query # wouldn't return any results anyway new_start = shrink_time_window(filter_keys.get('issue'), start) # TODO (alexh) this is a quick emergency fix for an occasion where a search # results in only 1 django candidate, which is then passed to snuba to # check and we raised because of it. Remove this once we figure out why the # candidate was returned from django at all if it existed only outside the # time range of the query if new_start <= end: start = new_start if start > end: raise QueryOutsideGroupActivityError kwargs.update({ 'from_date': start.isoformat(), 'to_date': end.isoformat(), 'groupby': groupby, 'conditions': conditions, 'aggregations': aggregations, 'project': project_ids, 'granularity': rollup, # TODO name these things the same }) kwargs = {k: v for k, v in six.iteritems(kwargs) if v is not None} kwargs.update(OVERRIDE_OPTIONS) headers = {} if referrer: headers['referer'] = referrer try: with timer('snuba_query'): response = _snuba_pool.urlopen( 'POST', '/query', body=json.dumps(kwargs), headers=headers) except urllib3.exceptions.HTTPError as err: raise SnubaError(err) try: body = json.loads(response.data) except ValueError: raise UnexpectedResponseError(u"Could not decode JSON response: {}".format(response.data)) if response.status != 200: if body.get('error'): error = body['error'] if response.status == 429: raise RateLimitExceeded(error['message']) elif error['type'] == 'schema': raise SchemaValidationError(error['message']) elif error['type'] == 'clickhouse': raise clickhouse_error_codes_map.get( error['code'], QueryExecutionError, )(error['message']) else: raise SnubaError(error['message']) else: raise SnubaError(u'HTTP {}'.format(response.status)) # Forward and reverse translation maps from model ids to snuba keys, per column body['data'] = [reverse(d) for d in body['data']] return body
def query(self, projects, tags=None, environments=None, sort_by='date', limit=100, cursor=None, count_hits=False, paginator_options=None, **parameters): from sentry.models import Group, GroupAssignee, GroupStatus, GroupSubscription, Release # ensure projects are from same org if len({p.organization_id for p in projects}) != 1: raise RuntimeError('Cross organization search not supported') if paginator_options is None: paginator_options = {} if tags is None: tags = {} try: if tags.get('sentry:release') == 'latest': tags['sentry:release'] = get_latest_release(projects, environments) if parameters.get('first_release') == 'latest': parameters['first_release'] = get_latest_release(projects, environments) except Release.DoesNotExist: # no matches could possibly be found from this point on return Paginator(Group.objects.none()).get_result() group_queryset = QuerySetBuilder({ 'query': CallbackCondition( lambda queryset, query: queryset.filter( Q(message__icontains=query) | Q(culprit__icontains=query), ) if query else queryset, ), 'status': CallbackCondition( lambda queryset, status: queryset.filter(status=status), ), 'bookmarked_by': CallbackCondition( lambda queryset, user: queryset.filter( bookmark_set__project__in=projects, bookmark_set__user=user, ), ), 'assigned_to': CallbackCondition( functools.partial(assigned_to_filter, projects=projects), ), 'unassigned': CallbackCondition( lambda queryset, unassigned: (queryset.exclude if unassigned else queryset.filter)( id__in=GroupAssignee.objects.filter( project_id__in=[p.id for p in projects], ).values_list('group_id', flat=True), ), ), 'subscribed_by': CallbackCondition( lambda queryset, user: queryset.filter( id__in=GroupSubscription.objects.filter( project__in=projects, user=user, is_active=True, ).values_list('group'), ), ), 'active_at_from': ScalarCondition('active_at', 'gt'), 'active_at_to': ScalarCondition('active_at', 'lt'), }).build( Group.objects.filter(project__in=projects).exclude(status__in=[ GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS, GroupStatus.PENDING_MERGE, ]), parameters, ) # filter out groups which are beyond the retention period retention = quotas.get_event_retention(organization=projects[0].organization) if retention: retention_window_start = timezone.now() - timedelta(days=retention) else: retention_window_start = None # TODO: This could be optimized when building querysets to identify # criteria that are logically impossible (e.g. if the upper bound # for last seen is before the retention window starts, no results # exist.) if retention_window_start: group_queryset = group_queryset.filter(last_seen__gte=retention_window_start) # This is a punt because the SnubaSearchBackend (a subclass) shares so much that it # seemed better to handle all the shared initialization and then handoff to the # actual backend. return self._query(projects, retention_window_start, group_queryset, tags, environments, sort_by, limit, cursor, count_hits, paginator_options, **parameters)
def _prepare_query_params(query_params): # convert to naive UTC datetimes, as Snuba only deals in UTC # and this avoids offset-naive and offset-aware issues start = naiveify_datetime(query_params.start) end = naiveify_datetime(query_params.end) with timer('get_snuba_map'): forward, reverse = get_snuba_translators( query_params.filter_keys, is_grouprelease=query_params.is_grouprelease, ) if 'project_id' in query_params.filter_keys: # If we are given a set of project ids, use those directly. project_ids = list(set(query_params.filter_keys['project_id'])) elif query_params.filter_keys: # Otherwise infer the project_ids from any related models with timer('get_related_project_ids'): ids = [ get_related_project_ids(k, query_params.filter_keys[k]) for k in query_params.filter_keys ] project_ids = list(set.union(*map(set, ids))) else: project_ids = [] for col, keys in six.iteritems(forward(deepcopy( query_params.filter_keys))): if keys: if len(keys) == 1 and None in keys: query_params.conditions.append((col, 'IS NULL', None)) else: query_params.conditions.append((col, 'IN', keys)) if not project_ids: raise UnqualifiedQueryError( "No project_id filter, or none could be inferred from other filters." ) # any project will do, as they should all be from the same organization project = Project.objects.get(pk=project_ids[0]) retention = quotas.get_event_retention( organization=Organization(project.organization_id)) if retention: start = max(start, datetime.utcnow() - timedelta(days=retention)) if start > end: raise QueryOutsideRetentionError # if `shrink_time_window` pushed `start` after `end` it means the user queried # a Group for T1 to T2 when the group was only active for T3 to T4, so the query # wouldn't return any results anyway new_start = shrink_time_window(query_params.filter_keys.get('issue'), start) # TODO (alexh) this is a quick emergency fix for an occasion where a search # results in only 1 django candidate, which is then passed to snuba to # check and we raised because of it. Remove this once we figure out why the # candidate was returned from django at all if it existed only outside the # time range of the query if new_start <= end: start = new_start if start > end: raise QueryOutsideGroupActivityError query_params.kwargs.update({ 'from_date': start.isoformat(), 'to_date': end.isoformat(), 'groupby': query_params.groupby, 'conditions': query_params.conditions, 'aggregations': query_params.aggregations, 'project': project_ids, 'granularity': query_params.rollup, # TODO name these things the same }) kwargs = { k: v for k, v in six.iteritems(query_params.kwargs) if v is not None } kwargs.update(OVERRIDE_OPTIONS) return kwargs, forward, reverse
def get_project_config(project, org_options=None, full_config=True, project_keys=None): """ Constructs the ProjectConfig information. :param project: The project to load configuration for. Ensure that organization is bound on this object; otherwise it will be loaded from the database. :param org_options: Inject preloaded organization options for faster loading. If ``None``, options are lazy-loaded from the database. :param full_config: True if only the full config is required, False if only the restricted (for external relays) is required (default True, i.e. full configuration) :param project_keys: Pre-fetched project keys for performance, similar to org_options. However, if no project keys are provided it is assumed that the config does not need to contain auth information (this is the case when used in python's StoreView) :return: a ProjectConfig object for the given project """ with configure_scope() as scope: scope.set_tag("project", project.id) public_keys = [] for project_key in project_keys or (): key = { "publicKey": project_key.public_key, "isEnabled": project_key.status == 0 } if full_config: key["numericId"] = project_key.id key["quotas"] = [ quota.to_json() for quota in quotas.get_quotas(project, key=project_key) ] public_keys.append(key) now = datetime.utcnow().replace(tzinfo=utc) if org_options is None: org_options = OrganizationOption.objects.get_all_values( project.organization_id) with Hub.current.start_span(op="get_public_config"): cfg = { "disabled": project.status > 0, "slug": project.slug, "lastFetch": now, "lastChange": project.get_option("sentry:relay-rev-lastchange", now), "rev": project.get_option("sentry:relay-rev", uuid.uuid4().hex), "publicKeys": public_keys, "config": { "allowedDomains": list(get_origins(project)), "trustedRelays": org_options.get("sentry:trusted-relays", []), "piiConfig": _get_pii_config(project), "datascrubbingSettings": _get_datascrubbing_settings(project, org_options), }, "projectId": project.id, # XXX: Unused by Relay, required by Python store } if not full_config: # This is all we need for external Relay processors return ProjectConfig(project, **cfg) # The organization id is only required for reporting when processing events # internally. Do not expose it to external Relays. cfg["organizationId"] = project.organization_id project_cfg = cfg["config"] with Hub.current.start_span(op="get_filter_settings"): # get the filter settings for this project filter_settings = {} project_cfg["filterSettings"] = filter_settings for flt in get_all_filters(): filter_id = get_filter_key(flt) settings = _load_filter_settings(flt, project) filter_settings[filter_id] = settings invalid_releases = project.get_option(u"sentry:{}".format( FilterTypes.RELEASES)) if invalid_releases: filter_settings["releases"] = {"releases": invalid_releases} blacklisted_ips = project.get_option("sentry:blacklisted_ips") if blacklisted_ips: filter_settings["clientIps"] = {"blacklistedIps": blacklisted_ips} error_messages = project.get_option(u"sentry:{}".format( FilterTypes.ERROR_MESSAGES)) if error_messages: filter_settings["errorMessages"] = {"patterns": error_messages} csp_disallowed_sources = [] if bool(project.get_option("sentry:csp_ignored_sources_defaults", True)): csp_disallowed_sources += DEFAULT_DISALLOWED_SOURCES csp_disallowed_sources += project.get_option( "sentry:csp_ignored_sources", []) if csp_disallowed_sources: filter_settings["csp"] = { "disallowedSources": csp_disallowed_sources } with Hub.current.start_span(op="get_grouping_config_dict_for_project"): project_cfg["groupingConfig"] = get_grouping_config_dict_for_project( project) with Hub.current.start_span(op="get_event_retention"): project_cfg["eventRetention"] = quotas.get_event_retention( project.organization) return ProjectConfig(project, **cfg)
def raw_query(start, end, groupby=None, conditions=None, filter_keys=None, aggregations=None, rollup=None, arrayjoin=None, limit=None, orderby=None, having=None, referrer=None, is_grouprelease=False, selected_columns=None,): """ Sends a query to snuba. `conditions`: A list of (column, operator, literal) conditions to be passed to the query. Conditions that we know will not have to be translated should be passed this way (eg tag[foo] = bar). `filter_keys`: A dictionary of {col: [key, ...]} that will be converted into "col IN (key, ...)" conditions. These are used to restrict the query to known sets of project/issue/environment/release etc. Appropriate translations (eg. from environment model ID to environment name) are performed on the query, and the inverse translation performed on the result. The project_id(s) to restrict the query to will also be automatically inferred from these keys. `aggregations` a list of (aggregation_function, column, alias) tuples to be passed to the query. """ groupby = groupby or [] conditions = conditions or [] having = having or [] aggregations = aggregations or [] filter_keys = filter_keys or {} selected_columns = selected_columns or [] with timer('get_snuba_map'): forward, reverse = get_snuba_translators(filter_keys, is_grouprelease=is_grouprelease) if 'project_id' in filter_keys: # If we are given a set of project ids, use those directly. project_ids = filter_keys['project_id'] elif filter_keys: # Otherwise infer the project_ids from any related models with timer('get_related_project_ids'): ids = [get_related_project_ids(k, filter_keys[k]) for k in filter_keys] project_ids = list(set.union(*map(set, ids))) else: project_ids = [] for col, keys in six.iteritems(forward(filter_keys.copy())): if keys: if len(keys) == 1 and keys[0] is None: conditions.append((col, 'IS NULL', None)) else: conditions.append((col, 'IN', keys)) if not project_ids: raise SnubaError("No project_id filter, or none could be inferred from other filters.") # any project will do, as they should all be from the same organization project = Project.objects.get(pk=project_ids[0]) retention = quotas.get_event_retention( organization=Organization(project.organization_id) ) if retention: start = max(start, datetime.utcnow() - timedelta(days=retention)) if start > end: raise EntireQueryOutsideRetentionError # If the grouping, aggregation, or any of the conditions reference `issue` # we need to fetch the issue definitions (issue -> fingerprint hashes) aggregate_cols = [a[1] for a in aggregations] condition_cols = [c[0] for c in flat_conditions(conditions)] all_cols = groupby + aggregate_cols + condition_cols + selected_columns get_issues = 'issue' in all_cols with timer('get_project_issues'): issues = get_project_issues(project_ids, filter_keys.get('issue')) if get_issues else None request = {k: v for k, v in six.iteritems({ 'from_date': start.isoformat(), 'to_date': end.isoformat(), 'conditions': conditions, 'having': having, 'groupby': groupby, 'project': project_ids, 'aggregations': aggregations, 'granularity': rollup, 'issues': issues, 'arrayjoin': arrayjoin, 'limit': limit, 'orderby': orderby, 'selected_columns': selected_columns, }) if v is not None} headers = {} if referrer: headers['referer'] = referrer try: with timer('snuba_query'): response = _snuba_pool.urlopen( 'POST', '/query', body=json.dumps(request), headers=headers) except urllib3.exceptions.HTTPError as err: raise SnubaError(err) try: body = json.loads(response.data) except ValueError: raise SnubaError("Could not decode JSON response: {}".format(response.data)) if response.status != 200: if body.get('error'): raise SnubaError(body['error']) else: raise SnubaError('HTTP {}'.format(response.status)) # Forward and reverse translation maps from model ids to snuba keys, per column body['data'] = [reverse(d) for d in body['data']] return body
def query(self, projects, tags=None, environments=None, sort_by='date', limit=100, cursor=None, count_hits=False, paginator_options=None, search_filters=None, **parameters): from sentry.models import Group, GroupStatus, GroupSubscription search_filters = search_filters if search_filters is not None else [] # ensure projects are from same org if len({p.organization_id for p in projects}) != 1: raise RuntimeError('Cross organization search not supported') if paginator_options is None: paginator_options = {} if tags is None: tags = {} group_queryset = Group.objects.filter(project__in=projects).exclude( status__in=[ GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS, GroupStatus.PENDING_MERGE, ]) qs_builder_conditions = { 'status': QCallbackCondition(lambda status: Q(status=status), ), 'bookmarked_by': QCallbackCondition( lambda user: Q( bookmark_set__project__in=projects, bookmark_set__user=user, ), ), 'assigned_to': QCallbackCondition( functools.partial(assigned_to_filter, projects=projects), ), 'unassigned': QCallbackCondition( functools.partial(unassigned_filter, projects=projects), ), 'subscribed_by': QCallbackCondition( lambda user: Q(id__in=GroupSubscription.objects.filter( project__in=projects, user=user, is_active=True, ).values_list('group'), ), ), 'active_at': ScalarCondition('active_at'), } message = [ search_filter for search_filter in search_filters if search_filter.key.name == 'message' ] if message and message[0].value.raw_value: message = message[0] # We only support full wildcard matching in postgres if is_postgres() and message.value.is_wildcard(): group_queryset = message_regex_filter(group_queryset, message) else: # Otherwise, use the standard LIKE query qs_builder_conditions['message'] = QCallbackCondition( lambda message: Q( Q(message__icontains=message) | Q(culprit__icontains= message), ), ) group_queryset = QuerySetBuilder(qs_builder_conditions).build( group_queryset, search_filters, ) # filter out groups which are beyond the retention period retention = quotas.get_event_retention( organization=projects[0].organization) if retention: retention_window_start = timezone.now() - timedelta(days=retention) else: retention_window_start = None # TODO: This could be optimized when building querysets to identify # criteria that are logically impossible (e.g. if the upper bound # for last seen is before the retention window starts, no results # exist.) if retention_window_start: group_queryset = group_queryset.filter( last_seen__gte=retention_window_start) # This is a punt because the SnubaSearchBackend (a subclass) shares so much that it # seemed better to handle all the shared initialization and then handoff to the # actual backend. return self._query(projects, retention_window_start, group_queryset, environments, sort_by, limit, cursor, count_hits, paginator_options, search_filters, **parameters)
def query( self, projects, environments=None, sort_by="date", limit=100, cursor=None, count_hits=False, paginator_options=None, search_filters=None, date_from=None, date_to=None, max_hits=None, ): search_filters = search_filters if search_filters is not None else [] # ensure projects are from same org if len({p.organization_id for p in projects}) != 1: raise RuntimeError("Cross organization search not supported") if paginator_options is None: paginator_options = {} # filter out groups which are beyond the retention period retention = quotas.get_event_retention(organization=projects[0].organization) if retention: retention_window_start = timezone.now() - timedelta(days=retention) else: retention_window_start = None group_queryset = self._build_group_queryset( projects=projects, environments=environments, search_filters=search_filters, retention_window_start=retention_window_start, date_from=date_from, date_to=date_to, ) query_executor = self._get_query_executor( group_queryset=group_queryset, projects=projects, environments=environments, search_filters=search_filters, date_from=date_from, date_to=date_to, ) # ensure sort strategy is supported by executor if not query_executor.has_sort_strategy(sort_by): raise InvalidSearchQuery("Sort key '{}' not supported.".format(sort_by)) return query_executor.query( projects=projects, retention_window_start=retention_window_start, group_queryset=group_queryset, environments=environments, sort_by=sort_by, limit=limit, cursor=cursor, count_hits=count_hits, paginator_options=paginator_options, search_filters=search_filters, date_from=date_from, date_to=date_to, max_hits=max_hits, )
def query( self, projects, environments=None, sort_by='date', limit=100, cursor=None, count_hits=False, paginator_options=None, search_filters=None, date_from=None, date_to=None, ): from sentry.models import Group, GroupStatus, GroupSubscription search_filters = search_filters if search_filters is not None else [] # ensure projects are from same org if len({p.organization_id for p in projects}) != 1: raise RuntimeError('Cross organization search not supported') if paginator_options is None: paginator_options = {} group_queryset = Group.objects.filter(project__in=projects).exclude(status__in=[ GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS, GroupStatus.PENDING_MERGE, ]) qs_builder_conditions = { 'status': QCallbackCondition( lambda status: Q(status=status), ), 'bookmarked_by': QCallbackCondition( lambda user: Q( bookmark_set__project__in=projects, bookmark_set__user=user, ), ), 'assigned_to': QCallbackCondition( functools.partial(assigned_to_filter, projects=projects), ), 'unassigned': QCallbackCondition( functools.partial(unassigned_filter, projects=projects), ), 'subscribed_by': QCallbackCondition( lambda user: Q( id__in=GroupSubscription.objects.filter( project__in=projects, user=user, is_active=True, ).values_list('group'), ), ), 'active_at': ScalarCondition('active_at'), } message = [ search_filter for search_filter in search_filters if search_filter.key.name == 'message' ] if message and message[0].value.raw_value: message = message[0] # We only support full wildcard matching in postgres if is_postgres() and message.value.is_wildcard(): group_queryset = message_regex_filter(group_queryset, message) else: # Otherwise, use the standard LIKE query qs_builder_conditions['message'] = QCallbackCondition( lambda message: Q( Q(message__icontains=message) | Q(culprit__icontains=message), ), ) group_queryset = QuerySetBuilder(qs_builder_conditions).build( group_queryset, search_filters, ) # filter out groups which are beyond the retention period retention = quotas.get_event_retention(organization=projects[0].organization) if retention: retention_window_start = timezone.now() - timedelta(days=retention) else: retention_window_start = None # TODO: This could be optimized when building querysets to identify # criteria that are logically impossible (e.g. if the upper bound # for last seen is before the retention window starts, no results # exist.) if retention_window_start: group_queryset = group_queryset.filter(last_seen__gte=retention_window_start) # This is a punt because the SnubaSearchBackend (a subclass) shares so much that it # seemed better to handle all the shared initialization and then handoff to the # actual backend. return self._query( projects, retention_window_start, group_queryset, environments, sort_by, limit, cursor, count_hits, paginator_options, search_filters, date_from, date_to, )