def test_missing_latest_event(self): user = self.create_user() group = self.create_group() hash = GroupHash.objects.create( project=group.project, group=group, hash='xyz', ) GroupHash.record_last_processed_event_id( hash.id, ['invalid'], ) result = serialize(hash, user=user) assert result['latestEvent'] is None
def test_fetch_and_record_last_processed_event_id(self): group = self.group grouphash = GroupHash.objects.create( project=group.project, group=group, hash='xyz', ) GroupHash.record_last_processed_event_id( grouphash.id, 'event', ) assert GroupHash.fetch_last_processed_event_id( [grouphash.id, -1], ) == ['event', None]
def test_valid_latest_event(self): user = self.create_user() group = self.create_group() hash = GroupHash.objects.create( project=group.project, group=group, hash='xyz', ) event = Event.objects.get(id=self.create_event(group=group).id) GroupHash.record_last_processed_event_id( hash.id, event.event_id, ) result = serialize(hash, user=user) assert result['latestEvent'] == serialize(event, user=user)
def test_mismatched_latest_event(self): user = self.create_user() group = self.create_group() hash = GroupHash.objects.create( project=group.project, group=group, hash='xyz', ) event = self.create_event(group=self.create_group()) GroupHash.record_last_processed_event_id( group.project_id, [hash.id], event.event_id, ) result = serialize(hash, user=user) assert result['latestEvent'] is None
def _save_aggregate(self, event, hashes, release, **kwargs): project = event.project # attempt to find a matching hash all_hashes = self._find_hashes(project, hashes) existing_group_id = None for h in all_hashes: if h.group_id is not None: existing_group_id = h.group_id break if h.group_tombstone_id is not None: raise HashDiscarded('Matches group tombstone %s' % h.group_tombstone_id) # XXX(dcramer): this has the opportunity to create duplicate groups # it should be resolved by the hash merging function later but this # should be better tested/reviewed if existing_group_id is None: kwargs['score'] = ScoreClause.calculate(1, kwargs['last_seen']) # it's possible the release was deleted between # when we queried for the release and now, so # make sure it still exists first_release = kwargs.pop('first_release', None) with transaction.atomic(): short_id = project.next_short_id() group, group_is_new = Group.objects.create( project=project, short_id=short_id, first_release_id=Release.objects.filter( id=first_release.id, ).values_list('id', flat=True).first() if first_release else None, **kwargs ), True metrics.incr( 'group.created', skip_internal=True, tags={'platform': event.platform or 'unknown'} ) else: group = Group.objects.get(id=existing_group_id) group_is_new = False # Keep a set of all of the hashes that are relevant for this event and # belong to the destination group so that we can record this as the # last processed event for each. (We can't just update every # ``GroupHash`` instance, since we only want to record this for events # that not only include the hash but were also placed into the # associated group.) relevant_group_hashes = set( [instance for instance in all_hashes if instance.group_id == group.id] ) # If all hashes are brand new we treat this event as new is_new = False new_hashes = [h for h in all_hashes if h.group_id is None] if new_hashes: # XXX: There is a race condition here wherein another process could # create a new group that is associated with one of the new hashes, # add some event(s) to it, and then subsequently have the hash # "stolen" by this process. This then "orphans" those events from # their "siblings" in the group we've created here. We don't have a # way to fix this, since we can't call `_ensure_hashes_merged` # without filtering on `group_id` (which we can't do due to query # planner weirdness.) For more context, see 84c6f75a and d0e22787, # as well as GH-5085. GroupHash.objects.filter( id__in=[h.id for h in new_hashes], ).exclude( state=GroupHash.State.LOCKED_IN_MIGRATION, ).update(group=group) if group_is_new and len(new_hashes) == len(all_hashes): is_new = True # XXX: This can lead to invalid results due to a race condition and # lack of referential integrity enforcement, see above comment(s) # about "hash stealing". relevant_group_hashes.update(new_hashes) # XXX(dcramer): it's important this gets called **before** the aggregate # is processed as otherwise values like last_seen will get mutated can_sample = ( features.has('projects:sample-events', project=project) and should_sample( event.data.get('received') or float(event.datetime.strftime('%s')), group.data.get('last_received') or float(group.last_seen.strftime('%s')), group.times_seen, ) ) if not is_new: is_regression = self._process_existing_aggregate( group=group, event=event, data=kwargs, release=release, ) else: is_regression = False # Determine if we've sampled enough data to store this event if is_new or is_regression: is_sample = False else: is_sample = can_sample if not is_sample: GroupHash.record_last_processed_event_id( project.id, [h.id for h in relevant_group_hashes], event.event_id, ) return group, is_new, is_regression, is_sample
def _save_aggregate(self, event, hashes, release, **kwargs): project = event.project # attempt to find a matching hash all_hashes = self._find_hashes(project, hashes) existing_group_id = None for h in all_hashes: if h.group_id is not None: existing_group_id = h.group_id break if h.group_tombstone_id is not None: raise HashDiscarded('Matches group tombstone %s' % h.group_tombstone_id) # XXX(dcramer): this has the opportunity to create duplicate groups # it should be resolved by the hash merging function later but this # should be better tested/reviewed if existing_group_id is None: kwargs['score'] = ScoreClause.calculate(1, kwargs['last_seen']) # it's possible the release was deleted between # when we queried for the release and now, so # make sure it still exists first_release = kwargs.pop('first_release', None) with transaction.atomic(): short_id = project.next_short_id() group, group_is_new = Group.objects.create( project=project, short_id=short_id, first_release_id=Release.objects.filter( id=first_release.id, ).values_list('id', flat=True).first() if first_release else None, **kwargs), True metrics.incr('group.created', skip_internal=True, tags={'platform': event.platform or 'unknown'}) else: group = Group.objects.get(id=existing_group_id) group_is_new = False # If all hashes are brand new we treat this event as new is_new = False new_hashes = [h for h in all_hashes if h.group_id is None] if new_hashes: # XXX: There is a race condition here wherein another process could # create a new group that is associated with one of the new hashes, # add some event(s) to it, and then subsequently have the hash # "stolen" by this process. This then "orphans" those events from # their "siblings" in the group we've created here. We don't have a # way to fix this, since we can't call `_ensure_hashes_merged` # without filtering on `group_id` (which we can't do due to query # planner weirdness.) For more context, see 84c6f75a and d0e22787, # as well as GH-5085. GroupHash.objects.filter(id__in=[ h.id for h in new_hashes ], ).exclude(state=GroupHash.State.LOCKED_IN_MIGRATION, ).update( group=group) if group_is_new and len(new_hashes) == len(all_hashes): is_new = True # XXX(dcramer): it's important this gets called **before** the aggregate # is processed as otherwise values like last_seen will get mutated can_sample = (features.has('projects:sample-events', project=project) and should_sample( event.data.get('received') or float(event.datetime.strftime('%s')), group.data.get('last_received') or float(group.last_seen.strftime('%s')), group.times_seen, )) if not is_new: is_regression = self._process_existing_aggregate( group=group, event=event, data=kwargs, release=release, ) else: is_regression = False # Determine if we've sampled enough data to store this event if is_new or is_regression: is_sample = False else: is_sample = can_sample if not is_sample: GroupHash.record_last_processed_event_id( all_hashes[0].id, event.event_id, ) return group, is_new, is_regression, is_sample
def _save_aggregate(self, event, hashes, release, **kwargs): project = event.project # attempt to find a matching hash all_hashes = self._find_hashes(project, hashes) try: existing_group_id = six.next(h.group_id for h in all_hashes if h.group_id is not None) except StopIteration: existing_group_id = None # XXX(dcramer): this has the opportunity to create duplicate groups # it should be resolved by the hash merging function later but this # should be better tested/reviewed if existing_group_id is None: kwargs['score'] = ScoreClause.calculate(1, kwargs['last_seen']) with transaction.atomic(): short_id = project.next_short_id() group, group_is_new = Group.objects.create(project=project, short_id=short_id, **kwargs), True else: group = Group.objects.get(id=existing_group_id) group_is_new = False # Keep a set of all of the hashes that are relevant for this event and # belong to the destination group so that we can record this as the # last processed event for each. (We can't just update every # ``GroupHash`` instance, since we only want to record this for events # that not only include the hash but were also placed into the # associated group.) relevant_group_hashes = set([ instance for instance in all_hashes if instance.group_id == group.id ]) # If all hashes are brand new we treat this event as new is_new = False new_hashes = [h for h in all_hashes if h.group_id is None] if new_hashes: # XXX: There is a race condition here wherein another process could # create a new group that is associated with one of the new hashes, # add some event(s) to it, and then subsequently have the hash # "stolen" by this process. This then "orphans" those events from # their "siblings" in the group we've created here. We don't have a # way to fix this, since we can't call `_ensure_hashes_merged` # without filtering on `group_id` (which we can't do due to query # planner weirdness.) For more context, see 84c6f75a and d0e22787, # as well as GH-5085. GroupHash.objects.filter(id__in=[h.id for h in new_hashes ], ).update(group=group) if group_is_new and len(new_hashes) == len(all_hashes): is_new = True # XXX: This can lead to invalid results due to a race condition and # lack of referential integrity enforcement, see above comment(s) # about "hash stealing". relevant_group_hashes.update(new_hashes) # XXX(dcramer): it's important this gets called **before** the aggregate # is processed as otherwise values like last_seen will get mutated can_sample = (features.has('projects:sample-events', project=project) and should_sample( event.data.get('received') or float(event.datetime.strftime('%s')), group.data.get('last_received') or float(group.last_seen.strftime('%s')), group.times_seen, )) if not is_new: is_regression = self._process_existing_aggregate( group=group, event=event, data=kwargs, release=release, ) else: is_regression = False # Determine if we've sampled enough data to store this event if is_new or is_regression: is_sample = False else: is_sample = can_sample if not is_sample: GroupHash.record_last_processed_event_id( project.id, [h.id for h in relevant_group_hashes], event.event_id, ) return group, is_new, is_regression, is_sample