def test_simple(self): group = self.create_group(project=self.project) event = self.create_event(group=group) with self.tasks(): index_event_tags.delay( event_id=event.id, group_id=group.id, project_id=self.project.id, environment_id=self.environment.id, organization_id=self.project.organization_id, tags=[('foo', 'bar'), ('biz', 'baz')], ) tags = list( tagstore.get_event_tag_qs(event_id=event.id, ).values_list( 'key_id', 'value_id')) assert len(tags) == 2 tagkey = tagstore.get_tag_key( project_id=self.project.id, environment_id=None, key='foo', ) tagvalue = tagstore.get_tag_value( project_id=self.project.id, environment_id=None, key='foo', value='bar', ) assert (tagkey.id, tagvalue.id) in tags tagkey = tagstore.get_tag_key( project_id=self.project.id, environment_id=None, key='biz', ) tagvalue = tagstore.get_tag_value( project_id=self.project.id, environment_id=None, key='biz', value='baz', ) assert (tagkey.id, tagvalue.id) in tags # ensure it safely handles repeat runs with self.tasks(): index_event_tags.delay( event_id=event.id, group_id=group.id, project_id=self.project.id, environment_id=self.environment.id, organization_id=self.project.organization_id, tags=[('foo', 'bar'), ('biz', 'baz')], ) queryset = tagstore.get_event_tag_qs(event_id=event.id, ) assert queryset.count() == 2
def test_simple(self): group = self.create_group(project=self.project) event = self.create_event(group=group) with self.tasks(): index_event_tags.delay( event_id=event.id, group_id=group.id, project_id=self.project.id, environment_id=self.environment.id, organization_id=self.project.organization_id, tags=[('foo', 'bar'), ('biz', 'baz')], ) tags = list(tagstore.get_event_tag_qs( event_id=event.id, ).values_list('key_id', 'value_id')) assert len(tags) == 2 tagkey = tagstore.get_tag_key( key='foo', project_id=self.project.id, ) tagvalue = tagstore.get_tag_value( key='foo', value='bar', project_id=self.project.id, ) assert (tagkey.id, tagvalue.id) in tags tagkey = tagstore.get_tag_key( key='biz', project_id=self.project.id, ) tagvalue = tagstore.get_tag_value( key='biz', value='baz', project_id=self.project.id, ) assert (tagkey.id, tagvalue.id) in tags # ensure it safely handles repeat runs with self.tasks(): index_event_tags.delay( event_id=event.id, group_id=group.id, project_id=self.project.id, environment_id=self.environment.id, organization_id=self.project.organization_id, tags=[('foo', 'bar'), ('biz', 'baz')], ) queryset = tagstore.get_event_tag_qs( event_id=event.id, ) assert queryset.count() == 2
def test_simple(self): project = self.create_project() group = self.create_group( project=project, ) event = self.create_event(group=group) EventMapping.objects.create( project_id=project.id, event_id='a' * 32, group_id=group.id, ) tagstore.create_event_tags( event_id=event.id, group_id=group.id, project_id=project.id, tags=[ (1, 1), ], ) GroupAssignee.objects.create( group=group, project=project, user=self.user, ) GroupHash.objects.create( project=project, group=group, hash=uuid4().hex, ) GroupMeta.objects.create( group=group, key='foo', value='bar', ) GroupRedirect.objects.create( group_id=group.id, previous_group_id=1, ) deletion = ScheduledDeletion.schedule(group, days=0) deletion.update(in_progress=True) with self.tasks(): run_deletion(deletion.id) assert not Event.objects.filter(id=event.id).exists() assert not EventMapping.objects.filter( event_id='a' * 32, group_id=group.id, ).exists() assert not tagstore.get_event_tag_qs(event_id=event.id).exists() assert not GroupRedirect.objects.filter(group_id=group.id).exists() assert not GroupHash.objects.filter(group_id=group.id).exists() assert not Group.objects.filter(id=group.id).exists()
def test_simple(self): project = self.create_project() group = self.create_group(project=project, ) event = self.create_event(group=group) EventMapping.objects.create( project_id=project.id, event_id='a' * 32, group_id=group.id, ) tagstore.create_event_tags( event_id=event.id, group_id=group.id, project_id=project.id, environment_id=self.environment.id, tags=[ (1, 1), ], ) GroupAssignee.objects.create( group=group, project=project, user=self.user, ) GroupHash.objects.create( project=project, group=group, hash=uuid4().hex, ) GroupMeta.objects.create( group=group, key='foo', value='bar', ) GroupRedirect.objects.create( group_id=group.id, previous_group_id=1, ) deletion = ScheduledDeletion.schedule(group, days=0) deletion.update(in_progress=True) with self.tasks(): run_deletion(deletion.id) assert not Event.objects.filter(id=event.id).exists() assert not EventMapping.objects.filter( event_id='a' * 32, group_id=group.id, ).exists() assert not tagstore.get_event_tag_qs(event_id=event.id).exists() assert not GroupRedirect.objects.filter(group_id=group.id).exists() assert not GroupHash.objects.filter(group_id=group.id).exists() assert not Group.objects.filter(id=group.id).exists()
def test_simple(self): project = self.create_project() group = self.create_group( project=project, status=GroupStatus.PENDING_DELETION, ) event = self.create_event(group=group) EventMapping.objects.create( project_id=project.id, event_id='a' * 32, group_id=group.id, ) tagstore.create_event_tags( event_id=event.id, group_id=group.id, project_id=project.id, environment_id=self.environment.id, tags=[ (1, 1), ], ) GroupAssignee.objects.create( group=group, project=project, user=self.user, ) GroupHash.objects.create( project=project, group=group, hash=uuid4().hex, ) GroupMeta.objects.create( group=group, key='foo', value='bar', ) GroupRedirect.objects.create( group_id=group.id, previous_group_id=1, ) with self.tasks(): delete_group(object_id=group.id) assert not Event.objects.filter(id=event.id).exists() assert not EventMapping.objects.filter( event_id='a' * 32, group_id=group.id, ).exists() assert not tagstore.get_event_tag_qs(event_id=event.id).exists() assert not GroupRedirect.objects.filter(group_id=group.id).exists() assert not GroupHash.objects.filter(group_id=group.id).exists() assert not Group.objects.filter(id=group.id).exists()
def test_simple(self): project = self.create_project() group = self.create_group( project=project, status=GroupStatus.PENDING_DELETION, ) event = self.create_event(group=group) EventMapping.objects.create( project_id=project.id, event_id='a' * 32, group_id=group.id, ) tagstore.create_event_tags( event_id=event.id, group_id=group.id, project_id=project.id, tags=[ (1, 1), ], ) GroupAssignee.objects.create( group=group, project=project, user=self.user, ) GroupHash.objects.create( project=project, group=group, hash=uuid4().hex, ) GroupMeta.objects.create( group=group, key='foo', value='bar', ) GroupRedirect.objects.create( group_id=group.id, previous_group_id=1, ) with self.tasks(): delete_group(object_id=group.id) assert not Event.objects.filter(id=event.id).exists() assert not EventMapping.objects.filter( event_id='a' * 32, group_id=group.id, ).exists() assert not tagstore.get_event_tag_qs(event_id=event.id).exists() assert not GroupRedirect.objects.filter(group_id=group.id).exists() assert not GroupHash.objects.filter(group_id=group.id).exists() assert not Group.objects.filter(id=group.id).exists()
def migrate_events(caches, project, source_id, destination_id, fingerprints, events, actor_id): # XXX: This is only actually able to create a destination group and migrate # the group hashes if there are events that can be migrated. How do we # handle this if there aren't any events? We can't create a group (there # isn't any data to derive the aggregates from), so we'd have to mark the # hash as in limbo somehow...?) if not events: return destination_id if destination_id is None: # XXX: There is a race condition here between the (wall clock) time # that the migration is started by the user and when we actually # get to this block where the new destination is created and we've # moved the ``GroupHash`` so that events start being associated # with it. During this gap, there could have been additional events # ingested, and if we want to handle this, we'd need to record the # highest event ID we've seen at the beginning of the migration, # then scan all events greater than that ID and migrate the ones # where necessary. (This still isn't even guaranteed to catch all # of the events due to processing latency, but it's a better shot.) # Create a new destination group. destination = Group.objects.create(project_id=project.id, short_id=project.next_short_id(), **get_group_creation_attributes( caches, events)) destination_id = destination.id # Move the group hashes to the destination. GroupHash.objects.filter( project_id=project.id, hash__in=fingerprints, ).update(group=destination_id) # Create activity records for the source and destination group. Activity.objects.create( project_id=project.id, group_id=destination_id, type=Activity.UNMERGE_DESTINATION, user_id=actor_id, data={ 'fingerprints': fingerprints, 'source_id': source_id, }, ) Activity.objects.create( project_id=project.id, group_id=source_id, type=Activity.UNMERGE_SOURCE, user_id=actor_id, data={ 'fingerprints': fingerprints, 'destination_id': destination_id, }, ) else: # Update the existing destination group. destination = Group.objects.get(id=destination_id) destination.update( **get_group_backfill_attributes(caches, destination, events)) event_id_set = set(event.id for event in events) Event.objects.filter( project_id=project.id, id__in=event_id_set, ).update(group_id=destination_id) for event in events: event.group = destination tagstore.get_event_tag_qs( project_id=project.id, event_id__in=event_id_set, ).update(group_id=destination_id) event_event_id_set = set(event.event_id for event in events) EventMapping.objects.filter( project_id=project.id, event_id__in=event_event_id_set, ).update(group_id=destination_id) UserReport.objects.filter( project_id=project.id, event_id__in=event_event_id_set, ).update(group=destination_id) return destination.id
def _query(self, project, retention_window_start, group_queryset, tags, environment, sort_by, limit, cursor, count_hits, paginator_options, **parameters): from sentry.models import (Group, Environment, Event, GroupEnvironment, Release) if environment is not None: if 'environment' in tags: environment_name = tags.pop('environment') assert environment_name is ANY or Environment.objects.get( projects=project, name=environment_name, ).id == environment.id event_queryset_builder = QuerySetBuilder({ 'date_from': ScalarCondition('date_added', 'gt'), 'date_to': ScalarCondition('date_added', 'lt'), }) if any(key in parameters for key in event_queryset_builder.conditions.keys()): event_queryset = event_queryset_builder.build( tagstore.get_event_tag_qs( project_id=project.id, environment_id=environment.id, key='environment', value=environment.name, ), parameters, ) if retention_window_start is not None: event_queryset = event_queryset.filter( date_added__gte=retention_window_start) group_queryset = group_queryset.filter( id__in=list(event_queryset.distinct().values_list( 'group_id', flat=True)[:1000])) group_queryset = QuerySetBuilder({ 'first_release': CallbackCondition( lambda queryset, version: queryset.extra( where=[ '{} = {}'.format( get_sql_column(GroupEnvironment, 'first_release_id'), get_sql_column(Release, 'id'), ), '{} = %s'.format( get_sql_column(Release, 'organization'), ), '{} = %s'.format( get_sql_column(Release, 'version'), ), ], params=[project.organization_id, version], tables=[Release._meta.db_table], ), ), 'times_seen': CallbackCondition( # This condition represents the exact number of times that # an issue has been seen in an environment. Since an issue # can't be seen in an environment more times than the issue # was seen overall, we can safely exclude any groups that # don't have at least that many events. lambda queryset, times_seen: queryset.exclude( times_seen__lt=times_seen, ), ), 'times_seen_lower': CallbackCondition( # This condition represents the lower threshold for the # number of times an issue has been seen in an environment. # Since an issue can't be seen in an environment more times # than the issue was seen overall, we can safely exclude # any groups that haven't met that threshold. lambda queryset, times_seen: queryset.exclude( times_seen__lt=times_seen, ), ), # The following conditions make a few assertions that are are # correct in an abstract sense but may not accurately reflect # the existing implementation (see GH-5289). These assumptions # are that 1. The first seen time for a Group is the minimum # value of the first seen time for all of it's GroupEnvironment # relations; 2. The last seen time for a Group is the maximum # value of the last seen time for all of it's GroupEnvironment # relations; 3. The first seen time is always less than or # equal to the last seen time. 'age_from': CallbackCondition( # This condition represents the lower threshold for "first # seen" time for an environment. Due to assertions #1 and # #3, we can exclude any groups where the "last seen" time # is prior to this timestamp. lambda queryset, first_seen: queryset.exclude( last_seen__lt=first_seen, ), ), 'age_to': CallbackCondition( # This condition represents the upper threshold for "first # seen" time for an environment. Due to assertions #1, we # can exclude any values where the group first seen is # greater than that threshold. lambda queryset, first_seen: queryset.exclude( first_seen__gt=first_seen, ), ), 'last_seen_from': CallbackCondition( # This condition represents the lower threshold for "last # seen" time for an environment. Due to assertion #2, we # can exclude any values where the group last seen value is # less than that threshold. lambda queryset, last_seen: queryset.exclude(last_seen__lt= last_seen, ), ), 'last_seen_to': CallbackCondition( # This condition represents the upper threshold for "last # seen" time for an environment. Due to assertions #2 and # #3, we can exclude any values where the group first seen # value is greater than that threshold. lambda queryset, last_seen: queryset.exclude(first_seen__gt =last_seen, ), ), }).build( group_queryset.extra( where=[ '{} = {}'.format( get_sql_column(Group, 'id'), get_sql_column(GroupEnvironment, 'group_id'), ), '{} = %s'.format( get_sql_column(GroupEnvironment, 'environment_id'), ), ], params=[environment.id], tables=[GroupEnvironment._meta.db_table], ), parameters, ) get_sort_expression, sort_value_to_cursor_value = environment_sort_strategies[ sort_by] group_tag_value_queryset = tagstore.get_group_tag_value_qs( project_id=project.id, group_id=set( group_queryset.values_list('id', flat=True)[:10000]), environment_id=environment.id, key='environment', value=environment.name, ) if retention_window_start is not None: group_tag_value_queryset = group_tag_value_queryset.filter( last_seen__gte=retention_window_start) candidates = dict( QuerySetBuilder({ 'age_from': ScalarCondition('first_seen', 'gt'), 'age_to': ScalarCondition('first_seen', 'lt'), 'last_seen_from': ScalarCondition('last_seen', 'gt'), 'last_seen_to': ScalarCondition('last_seen', 'lt'), 'times_seen': CallbackCondition( lambda queryset, times_seen: queryset.filter( times_seen=times_seen), ), 'times_seen_lower': ScalarCondition('times_seen', 'gt'), 'times_seen_upper': ScalarCondition('times_seen', 'lt'), }).build( group_tag_value_queryset, parameters, ).extra(select={ 'sort_value': get_sort_expression(group_tag_value_queryset.model), }, ).values_list('group_id', 'sort_value')) if tags: # TODO: `get_group_ids_for_search_filter` should be able to # utilize the retention window start parameter for additional # optimizations. matches = tagstore.get_group_ids_for_search_filter( project_id=project.id, environment_id=environment.id, tags=tags, candidates=candidates.keys(), limit=len(candidates), ) for key in set(candidates) - set(matches or []): del candidates[key] result = SequencePaginator([(sort_value_to_cursor_value(score), id) for (id, score) in candidates.items()], reverse=True, **paginator_options).get_result( limit, cursor, count_hits=count_hits) groups = Group.objects.in_bulk(result.results) result.results = [groups[k] for k in result.results if k in groups] return result else: event_queryset_builder = QuerySetBuilder({ 'date_from': ScalarCondition('datetime', 'gt'), 'date_to': ScalarCondition('datetime', 'lt'), }) if any(key in parameters for key in event_queryset_builder.conditions.keys()): group_queryset = group_queryset.filter(id__in=list( event_queryset_builder.build( Event.objects.filter(project_id=project.id), parameters, ).distinct().values_list('group_id', flat=True)[:1000], )) group_queryset = QuerySetBuilder({ 'first_release': CallbackCondition( lambda queryset, version: queryset.filter( first_release__organization_id=project.organization_id, first_release__version=version, ), ), 'age_from': ScalarCondition('first_seen', 'gt'), 'age_to': ScalarCondition('first_seen', 'lt'), 'last_seen_from': ScalarCondition('last_seen', 'gt'), 'last_seen_to': ScalarCondition('last_seen', 'lt'), 'times_seen': CallbackCondition( lambda queryset, times_seen: queryset.filter(times_seen= times_seen), ), 'times_seen_lower': ScalarCondition('times_seen', 'gt'), 'times_seen_upper': ScalarCondition('times_seen', 'lt'), }).build( group_queryset, parameters, ).extra(select={ 'sort_value': get_sort_clause(sort_by), }, ) if tags: group_ids = tagstore.get_group_ids_for_search_filter( project_id=project.id, environment_id=None, tags=tags, candidates=None, ) if group_ids: group_queryset = group_queryset.filter(id__in=group_ids) else: group_queryset = group_queryset.none() paginator_cls, sort_clause = sort_strategies[sort_by] group_queryset = group_queryset.order_by(sort_clause) paginator = paginator_cls(group_queryset, sort_clause, **paginator_options) return paginator.get_result(limit, cursor, count_hits=count_hits)
def _query(self, projects, retention_window_start, group_queryset, tags, environments, sort_by, limit, cursor, count_hits, paginator_options, **parameters): from sentry.models import (Group, Environment, Event, GroupEnvironment, Release) # this backend only supports search within one project/environment if len(projects) != 1 or (environments is not None and len(environments) > 1): raise NotImplementedError project = projects[0] environment = environments[0] if environments is not None else environments if environment is not None: if 'environment' in tags: environment_name = tags.pop('environment') assert environment_name is ANY or Environment.objects.get( projects=project, name=environment_name, ).id == environment.id event_queryset_builder = QuerySetBuilder({ 'date_from': ScalarCondition('date_added', 'gt'), 'date_to': ScalarCondition('date_added', 'lt'), }) if any(key in parameters for key in event_queryset_builder.conditions.keys()): event_queryset = event_queryset_builder.build( tagstore.get_event_tag_qs( project_id=project.id, environment_id=environment.id, key='environment', value=environment.name, ), parameters, ) if retention_window_start is not None: event_queryset = event_queryset.filter(date_added__gte=retention_window_start) group_queryset = group_queryset.filter( id__in=list(event_queryset.distinct().values_list('group_id', flat=True)[:1000]) ) _, group_queryset_sort_clause = sort_strategies[sort_by] group_queryset = QuerySetBuilder({ 'first_release': CallbackCondition( lambda queryset, version: queryset.extra( where=[ '{} = {}'.format( get_sql_column(GroupEnvironment, 'first_release_id'), get_sql_column(Release, 'id'), ), '{} = %s'.format( get_sql_column(Release, 'organization'), ), '{} = %s'.format( get_sql_column(Release, 'version'), ), ], params=[project.organization_id, version], tables=[Release._meta.db_table], ), ), 'times_seen': CallbackCondition( # This condition represents the exact number of times that # an issue has been seen in an environment. Since an issue # can't be seen in an environment more times than the issue # was seen overall, we can safely exclude any groups that # don't have at least that many events. lambda queryset, times_seen: queryset.exclude( times_seen__lt=times_seen, ), ), 'times_seen_lower': CallbackCondition( # This condition represents the lower threshold for the # number of times an issue has been seen in an environment. # Since an issue can't be seen in an environment more times # than the issue was seen overall, we can safely exclude # any groups that haven't met that threshold. lambda queryset, times_seen: queryset.exclude( times_seen__lt=times_seen, ), ), # The following conditions make a few assertions that are are # correct in an abstract sense but may not accurately reflect # the existing implementation (see GH-5289). These assumptions # are that 1. The first seen time for a Group is the minimum # value of the first seen time for all of it's GroupEnvironment # relations; 2. The last seen time for a Group is the maximum # value of the last seen time for all of it's GroupEnvironment # relations; 3. The first seen time is always less than or # equal to the last seen time. 'age_from': CallbackCondition( # This condition represents the lower threshold for "first # seen" time for an environment. Due to assertions #1 and # #3, we can exclude any groups where the "last seen" time # is prior to this timestamp. lambda queryset, first_seen: queryset.exclude( last_seen__lt=first_seen, ), ), 'age_to': CallbackCondition( # This condition represents the upper threshold for "first # seen" time for an environment. Due to assertions #1, we # can exclude any values where the group first seen is # greater than that threshold. lambda queryset, first_seen: queryset.exclude( first_seen__gt=first_seen, ), ), 'last_seen_from': CallbackCondition( # This condition represents the lower threshold for "last # seen" time for an environment. Due to assertion #2, we # can exclude any values where the group last seen value is # less than that threshold. lambda queryset, last_seen: queryset.exclude( last_seen__lt=last_seen, ), ), 'last_seen_to': CallbackCondition( # This condition represents the upper threshold for "last # seen" time for an environment. Due to assertions #2 and # #3, we can exclude any values where the group first seen # value is greater than that threshold. lambda queryset, last_seen: queryset.exclude( first_seen__gt=last_seen, ), ), }).build( group_queryset.extra( where=[ '{} = {}'.format( get_sql_column(Group, 'id'), get_sql_column(GroupEnvironment, 'group_id'), ), '{} = %s'.format( get_sql_column(GroupEnvironment, 'environment_id'), ), ], params=[environment.id], tables=[GroupEnvironment._meta.db_table], ), parameters, ).order_by(group_queryset_sort_clause) get_sort_expression, sort_value_to_cursor_value = environment_sort_strategies[sort_by] group_tag_value_queryset = tagstore.get_group_tag_value_qs( project_id=project.id, group_id=set(group_queryset.values_list('id', flat=True)[:10000]), environment_id=environment.id, key='environment', value=environment.name, ) if retention_window_start is not None: group_tag_value_queryset = group_tag_value_queryset.filter( last_seen__gte=retention_window_start ) candidates = dict( QuerySetBuilder({ 'age_from': ScalarCondition('first_seen', 'gt'), 'age_to': ScalarCondition('first_seen', 'lt'), 'last_seen_from': ScalarCondition('last_seen', 'gt'), 'last_seen_to': ScalarCondition('last_seen', 'lt'), 'times_seen': CallbackCondition( lambda queryset, times_seen: queryset.filter(times_seen=times_seen), ), 'times_seen_lower': ScalarCondition('times_seen', 'gt'), 'times_seen_upper': ScalarCondition('times_seen', 'lt'), }).build( group_tag_value_queryset, parameters, ).extra( select={ 'sort_value': get_sort_expression(group_tag_value_queryset.model), }, ).values_list('group_id', 'sort_value') ) if tags: # TODO: `get_group_ids_for_search_filter` should be able to # utilize the retention window start parameter for additional # optimizations. matches = tagstore.get_group_ids_for_search_filter( project_id=project.id, environment_id=environment.id, tags=tags, candidates=candidates.keys(), limit=len(candidates), ) for key in set(candidates) - set(matches or []): del candidates[key] result = SequencePaginator( [(sort_value_to_cursor_value(score), id) for (id, score) in candidates.items()], reverse=True, **paginator_options ).get_result(limit, cursor, count_hits=count_hits) groups = Group.objects.in_bulk(result.results) result.results = [groups[k] for k in result.results if k in groups] return result else: event_queryset_builder = QuerySetBuilder({ 'date_from': ScalarCondition('datetime', 'gt'), 'date_to': ScalarCondition('datetime', 'lt'), }) if any(key in parameters for key in event_queryset_builder.conditions.keys()): group_queryset = group_queryset.filter( id__in=list( event_queryset_builder.build( Event.objects.filter(project_id=project.id), parameters, ).distinct().values_list('group_id', flat=True)[:1000], ) ) group_queryset = QuerySetBuilder({ 'first_release': CallbackCondition( lambda queryset, version: queryset.filter( first_release__organization_id=project.organization_id, first_release__version=version, ), ), 'age_from': ScalarCondition('first_seen', 'gt'), 'age_to': ScalarCondition('first_seen', 'lt'), 'last_seen_from': ScalarCondition('last_seen', 'gt'), 'last_seen_to': ScalarCondition('last_seen', 'lt'), 'times_seen': CallbackCondition( lambda queryset, times_seen: queryset.filter(times_seen=times_seen), ), 'times_seen_lower': ScalarCondition('times_seen', 'gt'), 'times_seen_upper': ScalarCondition('times_seen', 'lt'), }).build( group_queryset, parameters, ).extra( select={ 'sort_value': get_sort_clause(sort_by), }, ) if tags: group_ids = tagstore.get_group_ids_for_search_filter( project_id=project.id, environment_id=None, tags=tags, candidates=None, ) if group_ids: group_queryset = group_queryset.filter(id__in=group_ids) else: group_queryset = group_queryset.none() paginator_cls, sort_clause = sort_strategies[sort_by] group_queryset = group_queryset.order_by(sort_clause) paginator = paginator_cls(group_queryset, sort_clause, **paginator_options) return paginator.get_result(limit, cursor, count_hits=count_hits)
def test_simple(self): from sentry.tagstore.legacy.tasks import delete_tag_key team = self.create_team(name='test', slug='test') project = self.create_project(team=team, name='test1', slug='test1') group = self.create_group(project=project) key = 'foo' value = 'bar' tk = tagstore.create_tag_key(key=key, project_id=project.id) tagstore.create_tag_value(key=key, value=value, project_id=project.id) tagstore.create_group_tag_key(key=key, group_id=group.id, project_id=project.id) tagstore.create_group_tag_value(key=key, value=value, group_id=group.id, project_id=project.id) tagstore.create_event_tag( key_id=tk.id, group_id=group.id, value_id=1, project_id=project.id, event_id=1, ) project2 = self.create_project(team=team, name='test2') group2 = self.create_group(project=project2) tk2 = tagstore.create_tag_key(key=key, project_id=project2.id) tagstore.create_group_tag_key(key=key, group_id=group2.id, project_id=project2.id) tagstore.create_group_tag_value(key=key, value=value, group_id=group2.id, project_id=project2.id) tagstore.create_event_tag( key_id=tk2.id, group_id=group2.id, value_id=1, project_id=project.id, event_id=1, ) with self.tasks(): delete_tag_key(object_id=tk.id) try: tagstore.get_group_tag_value(group.id, key, value) assert False # verify exception thrown except tagstore.GroupTagValueNotFound: pass try: tagstore.get_group_tag_key(group.id, key) assert False # verify exception thrown except tagstore.GroupTagKeyNotFound: pass try: tagstore.get_tag_value(project.id, key, value) assert False # verify exception thrown except tagstore.TagValueNotFound: pass assert not tagstore.get_event_tag_qs(key_id=tk.id).exists() try: tagstore.get_tag_key(project.id, key) assert False # verify exception thrown except tagstore.TagKeyNotFound: pass assert tagstore.get_tag_key(project2.id, key) is not None assert tagstore.get_group_tag_key(group2.id, key) is not None assert tagstore.get_group_tag_value(group2.id, key, value) is not None assert tagstore.get_event_tag_qs(key_id=tk2.id).exists()
def test_simple(self): team = self.create_team(name='test', slug='test') project = self.create_project(team=team, name='test1', slug='test1') group = self.create_group(project=project) key = 'foo' value = 'bar' tk = tagstore.create_tag_key( key=key, project_id=project.id, environment_id=self.environment.id) tagstore.create_tag_value( key=key, value=value, project_id=project.id, environment_id=self.environment.id) tagstore.create_group_tag_key( key=key, group_id=group.id, project_id=project.id, environment_id=self.environment.id) tagstore.create_group_tag_value( key=key, value=value, group_id=group.id, project_id=project.id, environment_id=self.environment.id ) tagstore.create_event_tags( group_id=group.id, project_id=project.id, event_id=1, tags=[ (tk.id, 1), ] ) project2 = self.create_project(team=team, name='test2') group2 = self.create_group(project=project2) tk2 = tagstore.create_tag_key(project2.id, self.environment.id, key) tagstore.create_group_tag_key( key=key, group_id=group2.id, project_id=project2.id, environment_id=self.environment.id) tagstore.create_group_tag_value( key=key, value=value, group_id=group2.id, project_id=project2.id, environment_id=self.environment.id ) tagstore.create_event_tags( group_id=group2.id, project_id=project.id, event_id=1, tags=[ (tk2.id, 1), ], ) deletion = ScheduledDeletion.schedule(tk, days=0) deletion.update(in_progress=True) with self.tasks(): run_deletion(deletion.id) try: tagstore.get_group_tag_value(group.id, key, value) assert False # verify exception thrown except tagstore.GroupTagValueNotFound: pass try: tagstore.get_group_tag_key(group.id, key) assert False # verify exception thrown except tagstore.GroupTagKeyNotFound: pass try: tagstore.get_tag_value(project.id, key, value) assert False # verify exception thrown except tagstore.TagValueNotFound: pass try: tagstore.get_tag_key(project.id, key) assert False # verify exception thrown except tagstore.TagKeyNotFound: pass assert tagstore.get_tag_key(project2.id, key) is not None assert tagstore.get_group_tag_key(group2.id, key) is not None assert tagstore.get_group_tag_value(group2.id, key, value) is not None assert tagstore.get_event_tag_qs(key_id=tk.id).exists() assert tagstore.get_event_tag_qs(key_id=tk2.id).exists()
def query(self, project, tags=None, environment=None, sort_by='date', limit=100, cursor=None, count_hits=False, paginator_options=None, **parameters): from sentry.models import (Environment, Event, Group, GroupEnvironment, GroupStatus, GroupSubscription, Release) if paginator_options is None: paginator_options = {} if tags is None: tags = {} try: if tags.get('sentry:release') == 'latest': tags['sentry:release'] = get_latest_release( project, environment) if parameters.get('first_release') == 'latest': parameters['first_release'] = get_latest_release( project, environment) except Release.DoesNotExist: # no matches could possibly be found from this point on return Paginator(Group.objects.none()).get_result() group_queryset = QuerySetBuilder({ 'query': CallbackCondition( lambda queryset, query: queryset.filter( Q(message__icontains=query) | Q(culprit__icontains=query), ) if query else queryset, ), 'status': CallbackCondition( lambda queryset, status: queryset.filter(status=status), ), 'bookmarked_by': CallbackCondition( lambda queryset, user: queryset.filter( bookmark_set__project=project, bookmark_set__user=user, ), ), 'assigned_to': CallbackCondition( functools.partial(assigned_to_filter, project=project), ), 'unassigned': CallbackCondition( lambda queryset, unassigned: queryset.filter( assignee_set__isnull=unassigned, ), ), 'subscribed_by': CallbackCondition( lambda queryset, user: queryset.filter( id__in=GroupSubscription.objects.filter( project=project, user=user, is_active=True, ).values_list('group'), ), ), 'active_at_from': ScalarCondition('active_at', 'gt'), 'active_at_to': ScalarCondition('active_at', 'lt'), }).build( Group.objects.filter(project=project).exclude(status__in=[ GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS, GroupStatus.PENDING_MERGE, ]), parameters, ) # filter out groups which are beyond the retention period retention = quotas.get_event_retention( organization=project.organization) if retention: retention_window_start = timezone.now() - timedelta(days=retention) # TODO: This could be optimized when building querysets to identify # criteria that are logically impossible (e.g. if the upper bound # for last seen is before the retention window starts, no results # exist.) group_queryset = group_queryset.filter( last_seen__gte=retention_window_start) else: retention_window_start = None if environment is not None: if 'environment' in tags: # TODO: This should probably just overwrite the existing tag, # rather than asserting on it, but...? assert Environment.objects.get( projects=project, name=tags.pop('environment'), ).id == environment.id event_queryset_builder = QuerySetBuilder({ 'date_from': ScalarCondition('date_added', 'gt'), 'date_to': ScalarCondition('date_added', 'lt'), }) if any(key in parameters for key in event_queryset_builder.conditions.keys()): event_queryset = event_queryset_builder.build( tagstore.get_event_tag_qs( project.id, environment.id, 'environment', environment.name, ), parameters, ) if retention_window_start is not None: event_queryset = event_queryset.filter( date_added__gte=retention_window_start) group_queryset = group_queryset.filter( id__in=list(event_queryset.distinct().values_list( 'group_id', flat=True)[:1000])) group_queryset = QuerySetBuilder({ 'first_release': CallbackCondition( lambda queryset, version: queryset.extra( where=[ '{} = {}'.format( get_sql_column(GroupEnvironment, 'first_release_id'), get_sql_column(Release, 'id'), ), '{} = %s'.format( get_sql_column(Release, 'organization'), ), '{} = %s'.format( get_sql_column(Release, 'version'), ), ], params=[project.organization_id, version], tables=[Release._meta.db_table], ), ), 'times_seen': CallbackCondition( # This condition represents the exact number of times that # an issue has been seen in an environment. Since an issue # can't be seen in an environment more times than the issue # was seen overall, we can safely exclude any groups that # don't have at least that many events. lambda queryset, times_seen: queryset.exclude( times_seen__lt=times_seen, ), ), 'times_seen_lower': CallbackCondition( # This condition represents the lower threshold for the # number of times an issue has been seen in an environment. # Since an issue can't be seen in an environment more times # than the issue was seen overall, we can safely exclude # any groups that haven't met that threshold. lambda queryset, times_seen: queryset.exclude( times_seen__lt=times_seen, ), ), # The following conditions make a few assertions that are are # correct in an abstract sense but may not accurately reflect # the existing implementation (see GH-5289). These assumptions # are that 1. The first seen time for a Group is the minimum # value of the first seen time for all of it's GroupEnvironment # relations; 2. The last seen time for a Group is the maximum # value of the last seen time for all of it's GroupEnvironment # relations; 3. The first seen time is always less than or # equal to the last seen time. 'age_from': CallbackCondition( # This condition represents the lower threshold for "first # seen" time for an environment. Due to assertions #1 and # #3, we can exclude any groups where the "last seen" time # is prior to this timestamp. lambda queryset, first_seen: queryset.exclude( last_seen__lt=first_seen, ), ), 'age_to': CallbackCondition( # This condition represents the upper threshold for "first # seen" time for an environment. Due to assertions #1, we # can exclude any values where the group first seen is # greater than that threshold. lambda queryset, first_seen: queryset.exclude( first_seen__gt=first_seen, ), ), 'last_seen_from': CallbackCondition( # This condition represents the lower threshold for "last # seen" time for an environment. Due to assertion #2, we # can exclude any values where the group last seen value is # less than that threshold. lambda queryset, last_seen: queryset.exclude(last_seen__lt= last_seen, ), ), 'last_seen_to': CallbackCondition( # This condition represents the upper threshold for "last # seen" time for an environment. Due to assertions #2 and # #3, we can exclude any values where the group first seen # value is greater than that threshold. lambda queryset, last_seen: queryset.exclude(first_seen__gt =last_seen, ), ), }).build( group_queryset.extra( where=[ '{} = {}'.format( get_sql_column(Group, 'id'), get_sql_column(GroupEnvironment, 'group_id'), ), '{} = %s'.format( get_sql_column(GroupEnvironment, 'environment_id'), ), ], params=[environment.id], tables=[GroupEnvironment._meta.db_table], ), parameters, ) get_sort_expression, sort_value_to_cursor_value = environment_sort_strategies[ sort_by] group_tag_value_queryset = tagstore.get_group_tag_value_qs( project.id, set(group_queryset.values_list('id', flat=True)), # TODO: Limit?, environment.id, 'environment', environment.name, ) if retention_window_start is not None: group_tag_value_queryset = group_tag_value_queryset.filter( last_seen__gte=retention_window_start) candidates = dict( QuerySetBuilder({ 'age_from': ScalarCondition('first_seen', 'gt'), 'age_to': ScalarCondition('first_seen', 'lt'), 'last_seen_from': ScalarCondition('last_seen', 'gt'), 'last_seen_to': ScalarCondition('last_seen', 'lt'), 'times_seen': CallbackCondition( lambda queryset, times_seen: queryset.filter( times_seen=times_seen), ), 'times_seen_lower': ScalarCondition('times_seen', 'gt'), 'times_seen_upper': ScalarCondition('times_seen', 'lt'), }).build( group_tag_value_queryset, parameters, ).extra(select={ 'sort_value': get_sort_expression(group_tag_value_queryset.model), }, ).values_list('group_id', 'sort_value')) if tags: # TODO: `get_group_ids_for_search_filter` should be able to # utilize the retention window start parameter for additional # optimizations. matches = tagstore.get_group_ids_for_search_filter( project.id, environment.id, tags, candidates.keys(), limit=len(candidates), ) for key in set(candidates) - set(matches or []): del candidates[key] result = SequencePaginator([(sort_value_to_cursor_value(score), id) for (id, score) in candidates.items()], reverse=True, **paginator_options).get_result( limit, cursor, count_hits=count_hits) groups = Group.objects.in_bulk(result.results) result.results = [groups[k] for k in result.results if k in groups] return result else: event_queryset_builder = QuerySetBuilder({ 'date_from': ScalarCondition('datetime', 'gt'), 'date_to': ScalarCondition('datetime', 'lt'), }) if any(key in parameters for key in event_queryset_builder.conditions.keys()): group_queryset = group_queryset.filter(id__in=list( event_queryset_builder.build( Event.objects.filter(project_id=project.id), parameters, ).distinct().values_list('group_id', flat=True)[:1000], )) group_queryset = QuerySetBuilder({ 'first_release': CallbackCondition( lambda queryset, version: queryset.filter( first_release__organization_id=project.organization_id, first_release__version=version, ), ), 'age_from': ScalarCondition('first_seen', 'gt'), 'age_to': ScalarCondition('first_seen', 'lt'), 'last_seen_from': ScalarCondition('last_seen', 'gt'), 'last_seen_to': ScalarCondition('last_seen', 'lt'), 'times_seen': CallbackCondition( lambda queryset, times_seen: queryset.filter(times_seen= times_seen), ), 'times_seen_lower': ScalarCondition('times_seen', 'gt'), 'times_seen_upper': ScalarCondition('times_seen', 'lt'), }).build( group_queryset, parameters, ).extra(select={ 'sort_value': get_sort_clause(sort_by), }, ) if tags: matches = tagstore.get_group_ids_for_search_filter( project.id, None, tags) if matches: group_queryset = group_queryset.filter(id__in=matches) else: group_queryset = group_queryset.none() paginator_cls, sort_clause = sort_strategies[sort_by] group_queryset = group_queryset.order_by(sort_clause) paginator = paginator_cls(group_queryset, sort_clause, **paginator_options) return paginator.get_result(limit, cursor, count_hits=count_hits)
def test_simple(self): team = self.create_team(name='test', slug='test') project = self.create_project(team=team, name='test1', slug='test1') group = self.create_group(project=project) key = 'foo' value = 'bar' tk = tagstore.create_tag_key(key=key, project_id=project.id, environment_id=self.environment.id) tagstore.create_tag_value(key=key, value=value, project_id=project.id, environment_id=self.environment.id) tagstore.create_group_tag_key(key=key, group_id=group.id, project_id=project.id, environment_id=self.environment.id) tagstore.create_group_tag_value(key=key, value=value, group_id=group.id, project_id=project.id, environment_id=self.environment.id) tagstore.create_event_tags(group_id=group.id, project_id=project.id, event_id=1, environment_id=self.environment.id, tags=[ (tk.id, 1), ]) project2 = self.create_project(team=team, name='test2') group2 = self.create_group(project=project2) tk2 = tagstore.create_tag_key(project2.id, self.environment.id, key) tagstore.create_group_tag_key(key=key, group_id=group2.id, project_id=project2.id, environment_id=self.environment.id) tagstore.create_group_tag_value(key=key, value=value, group_id=group2.id, project_id=project2.id, environment_id=self.environment.id) tagstore.create_event_tags( group_id=group2.id, project_id=project.id, environment_id=self.environment.id, event_id=1, tags=[ (tk2.id, 1), ], ) deletion = ScheduledDeletion.schedule(tk, days=0) deletion.update(in_progress=True) with self.tasks(): run_deletion(deletion.id) try: tagstore.get_group_tag_value(group.project_id, group.id, None, key, value) assert False # verify exception thrown except tagstore.GroupTagValueNotFound: pass try: tagstore.get_group_tag_key(group.project_id, group.id, None, key) assert False # verify exception thrown except tagstore.GroupTagKeyNotFound: pass try: tagstore.get_tag_value(project.id, None, key, value) assert False # verify exception thrown except tagstore.TagValueNotFound: pass try: tagstore.get_tag_key(project.id, None, key) assert False # verify exception thrown except tagstore.TagKeyNotFound: pass assert tagstore.get_tag_key(project2.id, None, key) is not None assert tagstore.get_group_tag_key(group2.project_id, group2.id, None, key) is not None assert tagstore.get_group_tag_value(group2.project_id, group2.id, None, key, value) is not None assert tagstore.get_event_tag_qs(key_id=tk.id).exists() assert tagstore.get_event_tag_qs(key_id=tk2.id).exists()
def migrate_events(caches, project, source_id, destination_id, fingerprints, events, actor_id): # XXX: This is only actually able to create a destination group and migrate # the group hashes if there are events that can be migrated. How do we # handle this if there aren't any events? We can't create a group (there # isn't any data to derive the aggregates from), so we'd have to mark the # hash as in limbo somehow...?) if not events: return destination_id if destination_id is None: # XXX: There is a race condition here between the (wall clock) time # that the migration is started by the user and when we actually # get to this block where the new destination is created and we've # moved the ``GroupHash`` so that events start being associated # with it. During this gap, there could have been additional events # ingested, and if we want to handle this, we'd need to record the # highest event ID we've seen at the beginning of the migration, # then scan all events greater than that ID and migrate the ones # where necessary. (This still isn't even guaranteed to catch all # of the events due to processing latency, but it's a better shot.) # Create a new destination group. destination = Group.objects.create( project_id=project.id, short_id=project.next_short_id(), **get_group_creation_attributes(caches, events) ) destination_id = destination.id # Move the group hashes to the destination. GroupHash.objects.filter( project_id=project.id, hash__in=fingerprints, ).update(group=destination_id) # Create activity records for the source and destination group. Activity.objects.create( project_id=project.id, group_id=destination_id, type=Activity.UNMERGE_DESTINATION, user_id=actor_id, data={ 'fingerprints': fingerprints, 'source_id': source_id, }, ) Activity.objects.create( project_id=project.id, group_id=source_id, type=Activity.UNMERGE_SOURCE, user_id=actor_id, data={ 'fingerprints': fingerprints, 'destination_id': destination_id, }, ) else: # Update the existing destination group. destination = Group.objects.get(id=destination_id) destination.update(**get_group_backfill_attributes(caches, destination, events)) event_id_set = set(event.id for event in events) Event.objects.filter( project_id=project.id, id__in=event_id_set, ).update(group_id=destination_id) for event in events: event.group = destination tagstore.get_event_tag_qs( project_id=project.id, event_id__in=event_id_set, ).update(group_id=destination_id) event_event_id_set = set(event.event_id for event in events) EventMapping.objects.filter( project_id=project.id, event_id__in=event_event_id_set, ).update(group_id=destination_id) UserReport.objects.filter( project_id=project.id, event_id__in=event_event_id_set, ).update(group=destination_id) return destination.id
def test_simple(self): from sentry.tagstore.legacy.tasks import delete_tag_key team = self.create_team(name='test', slug='test') project = self.create_project(team=team, name='test1', slug='test1') group = self.create_group(project=project) key = 'foo' value = 'bar' tk = tagstore.create_tag_key( key=key, project_id=project.id, environment_id=self.environment.id) tagstore.create_tag_value( key=key, value=value, project_id=project.id, environment_id=self.environment.id) tagstore.create_group_tag_key( key=key, group_id=group.id, project_id=project.id, environment_id=self.environment.id) tagstore.create_group_tag_value( key=key, value=value, group_id=group.id, project_id=project.id, environment_id=self.environment.id ) tagstore.create_event_tags( group_id=group.id, project_id=project.id, event_id=1, tags=[ (tk.id, 1), ], ) project2 = self.create_project(team=team, name='test2') group2 = self.create_group(project=project2) tk2 = tagstore.create_tag_key(key=key, project_id=project2.id, environment_id=self.environment.id) tagstore.create_group_tag_key( key=key, group_id=group2.id, project_id=project2.id, environment_id=self.environment.id) tagstore.create_group_tag_value( key=key, value=value, group_id=group2.id, project_id=project2.id, environment_id=self.environment.id ) tagstore.create_event_tags( group_id=group2.id, project_id=project.id, event_id=1, tags=[ (tk2.id, 1) ], ) with self.tasks(): delete_tag_key(object_id=tk.id) assert tagstore.get_event_tag_qs(key_id=tk.id).exists() try: tagstore.get_group_tag_value(group.id, key, value) assert False # verify exception thrown except tagstore.GroupTagValueNotFound: pass try: tagstore.get_group_tag_key(group.id, key) assert False # verify exception thrown except tagstore.GroupTagKeyNotFound: pass try: tagstore.get_tag_value(project.id, key, value) assert False # verify exception thrown except tagstore.TagValueNotFound: pass try: tagstore.get_tag_key(project.id, key) assert False # verify exception thrown except tagstore.TagKeyNotFound: pass assert tagstore.get_tag_key(project2.id, key) is not None assert tagstore.get_group_tag_key(group2.id, key) is not None assert tagstore.get_group_tag_value(group2.id, key, value) is not None assert tagstore.get_event_tag_qs(key_id=tk2.id).exists()