def process_individual_attachment(message, projects): event_id = message["event_id"] project_id = int(message["project_id"]) cache_key = cache_key_for_event({ "event_id": event_id, "project": project_id }) try: project = projects[project_id] except KeyError: logger.error("Project for ingested event does not exist: %s", project_id) return if not features.has("organizations:event-attachments", project.organization, actor=None): logger.info("Organization has no event attachments: %s", project_id) return # Attachments may be uploaded for events that already exist. Fetch the # existing group_id, so that the attachment can be fetched by group-level # APIs. This is inherently racy. events = eventstore.get_unfetched_events(filter=eventstore.Filter( event_ids=[event_id], project_ids=[project.id]), limit=1) group_id = None if events: group_id = events[0].group_id attachment = message["attachment"] attachment = attachment_cache.get_from_chunks( key=cache_key, type=attachment.pop("attachment_type"), **attachment) if attachment.type != "event.attachment": logger.exception("invalid individual attachment type: %s", attachment.type) return file = File.objects.create( name=attachment.name, type=attachment.type, headers={"Content-Type": attachment.content_type}, ) try: data = attachment.data except MissingAttachmentChunks: logger.exception("Missing chunks for cache_key=%s", cache_key) return file.putfile(BytesIO(data)) EventAttachment.objects.create(project_id=project.id, group_id=group_id, event_id=event_id, name=attachment.name, file=file) attachment.delete()
def reprocess_group(project_id, group_id, offset=0, start_time=None): if start_time is None: start_time = time.time() events = list( eventstore.get_unfetched_events( eventstore.Filter( project_ids=[project_id], group_ids=[group_id], # XXX: received? conditions=[["timestamp", "<", to_datetime(start_time)]], ), limit=GROUP_REPROCESSING_CHUNK_SIZE, offset=offset, referrer="reprocessing2.reprocess_group", ) ) if not events: return for event in events: reprocess_event.delay( project_id=project_id, event_id=event.event_id, start_time=start_time, ) reprocess_group.delay( project_id=project_id, group_id=group_id, offset=offset + len(events), start_time=start_time )
def reprocess_group(project_id, group_id, offset=0, start_time=None, max_events=None, acting_user_id=None): from sentry.reprocessing2 import start_group_reprocessing if start_time is None: start_time = time.time() start_group_reprocessing(project_id, group_id, max_events=max_events, acting_user_id=acting_user_id) if max_events is not None and max_events <= 0: events = [] else: limit = GROUP_REPROCESSING_CHUNK_SIZE if max_events is not None: limit = min(limit, max_events) events = list( eventstore.get_unfetched_events( eventstore.Filter( project_ids=[project_id], group_ids=[group_id], ), limit=limit, orderby=["-timestamp"], offset=offset, referrer="reprocessing2.reprocess_group", )) if not events: wait_group_reprocessed.delay(project_id=project_id, group_id=group_id) return for event in events: reprocess_event.delay( project_id=project_id, event_id=event.event_id, start_time=start_time, ) if max_events is not None: max_events -= len(events) reprocess_group.delay( project_id=project_id, group_id=group_id, offset=offset + len(events), start_time=start_time, max_events=max_events, )
def process_individual_attachment(message, projects): event_id = message["event_id"] project_id = int(message["project_id"]) cache_key = cache_key_for_event({ "event_id": event_id, "project": project_id }) try: project = projects[project_id] except KeyError: logger.error("Project for ingested event does not exist: %s", project_id) return if not features.has("organizations:event-attachments", project.organization, actor=None): logger.info("Organization has no event attachments: %s", project_id) return # Attachments may be uploaded for events that already exist. Fetch the # existing group_id, so that the attachment can be fetched by group-level # APIs. This is inherently racy. events = eventstore.get_unfetched_events(filter=eventstore.Filter( event_ids=[event_id], project_ids=[project.id]), limit=1) group_id = None if events: group_id = events[0].group_id attachment = message["attachment"] attachment = attachment_cache.get_from_chunks( key=cache_key, type=attachment.pop("attachment_type"), **attachment) if attachment.type != "event.attachment": logger.exception("invalid individual attachment type: %s", attachment.type) return save_attachment( cache_key, attachment, project, event_id, key_id=None, # TODO: Inject this from Relay group_id=group_id, start_time=None, # TODO: Inject this from Relay ) attachment.delete()
def chunk(self): conditions = [] if self.last_event is not None: conditions.extend( [ ["timestamp", "<=", self.last_event.timestamp], [ ["timestamp", "<", self.last_event.timestamp], ["event_id", "<", self.last_event.event_id], ], ] ) events = eventstore.get_unfetched_events( filter=eventstore.Filter( conditions=conditions, project_ids=[self.project_id], group_ids=[self.group_id] ), limit=self.DEFAULT_CHUNK_SIZE, referrer="deletions.group", orderby=["-timestamp", "-event_id"], ) if not events: return False self.last_event = events[-1] # Remove from nodestore node_ids = [Event.generate_node_id(self.project_id, event.event_id) for event in events] nodestore.delete_multi(node_ids) from sentry.reprocessing2 import delete_unprocessed_events delete_unprocessed_events(events) # Remove EventAttachment and UserReport *again* as those may not have a # group ID, therefore there may be dangling ones after "regular" model # deletion. event_ids = [event.event_id for event in events] models.EventAttachment.objects.filter( event_id__in=event_ids, project_id=self.project_id ).delete() models.UserReport.objects.filter( event_id__in=event_ids, project_id=self.project_id ).delete() return True
def chunk(self): conditions = [] if self.last_event is not None: conditions.extend([ ["timestamp", "<=", self.last_event.timestamp], [ ["timestamp", "<", self.last_event.timestamp], ["event_id", "<", self.last_event.event_id], ], ]) events = eventstore.get_unfetched_events( filter=eventstore.Filter(conditions=conditions, project_ids=[self.project_id], group_ids=[self.group_id]), limit=self.DEFAULT_CHUNK_SIZE, referrer="deletions.group", orderby=["-timestamp", "-event_id"], ) if not events: return False self.last_event = events[-1] # Remove from nodestore node_ids = [ Event.generate_node_id(self.project_id, event.event_id) for event in events ] nodestore.delete_multi(node_ids) delete_unprocessed_events(events) # Remove EventAttachment and UserReport event_ids = [event.event_id for event in events] EventAttachment.objects.filter(event_id__in=event_ids, project_id=self.project_id).delete() UserReport.objects.filter(event_id__in=event_ids, project_id=self.project_id).delete() return True