def process_message( self, message: Tuple[int, str, InsertEvent, Any], metadata: KafkaMessageMetadata, ) -> Optional[ProcessedMessage]: """\ Process a raw message into an insertion or replacement batch. Returns `None` if the event is too old to be written. """ version = message[0] if version != 2: raise InvalidMessageVersion( f"Unsupported message version: {version}") # version 2: (2, type, data, [state]) type_, event = message[1:3] if type_ == "insert": try: row = self.process_insert(event, metadata) except EventTooOld: return None if row is None: # the processor cannot/does not handle this input return None return InsertBatch([row], None) elif type_ in REPLACEMENT_EVENT_TYPES: # pass raw events along to republish return ReplacementBatch(str(event["project_id"]), [message]) else: raise InvalidMessageType(f"Invalid message type: {type_}")
def process_message(self, message: ReplacementMessage) -> Optional[Replacement]: type_ = message.action_type event = message.data if type_ in ( "start_delete_groups", "start_merge", "start_unmerge", "start_delete_tag", ): return None elif type_ == "end_delete_groups": processed = process_delete_groups(event, self.__required_columns) elif type_ == "end_merge": processed = process_merge(event, self.__all_column_names) elif type_ == "end_unmerge": processed = process_unmerge(event, self.__all_column_names) elif type_ == "end_delete_tag": processed = process_delete_tag( event, self.get_write_schema(), self.__tag_column_map, self.__promoted_tags, ) else: raise InvalidMessageType("Invalid message type: {}".format(type_)) return processed
def process_message(self, message: KafkaMessage) -> Optional[Replacement]: message = json.loads(message.value) version = message[0] if version == 2: type_, event = message[1:3] if type_ in ('start_delete_groups', 'start_merge', 'start_unmerge', 'start_delete_tag'): return None elif type_ == 'end_delete_groups': processed = process_delete_groups(event, self.__required_columns) elif type_ == 'end_merge': processed = process_merge(event, self.__all_column_names) elif type_ == 'end_unmerge': processed = process_unmerge(event, self.__all_column_names) elif type_ == 'end_delete_tag': processed = process_delete_tag(event, self.dataset) else: raise InvalidMessageType( "Invalid message type: {}".format(type_)) else: raise InvalidMessageVersion("Unknown message format: " + str(message)) return processed
def process_message(self, message: ReplacementMessage) -> Optional[Replacement]: type_ = message.action_type attributes_json = json.dumps({"message_type": type_, **message.data}) logger.info(attributes_json) if type_ in REPLACEMENT_EVENT_TYPES: metrics.increment( "process", 1, tags={"type": type_, "consumer_group": message.metadata.consumer_group}, ) if type_ in ( ReplacementType.START_DELETE_GROUPS, ReplacementType.START_MERGE, ReplacementType.START_UNMERGE, ReplacementType.START_UNMERGE_HIERARCHICAL, ReplacementType.START_DELETE_TAG, ): return None elif type_ == ReplacementType.END_DELETE_GROUPS: processed = process_delete_groups(message, self.__required_columns) elif type_ == ReplacementType.END_MERGE: processed = process_merge(message, self.__all_columns) elif type_ == ReplacementType.END_UNMERGE: processed = UnmergeGroupsReplacement.parse_message( message, self.__replacement_context ) elif type_ == ReplacementType.END_UNMERGE_HIERARCHICAL: processed = process_unmerge_hierarchical( message, self.__all_columns, self.__state_name ) elif type_ == ReplacementType.END_DELETE_TAG: processed = process_delete_tag( message, self.__all_columns, self.__tag_column_map, self.__promoted_tags, self.__use_promoted_prewhere, self.__schema, ) elif type_ == ReplacementType.TOMBSTONE_EVENTS: processed = process_tombstone_events( message, self.__required_columns, self.__state_name ) elif type_ == ReplacementType.REPLACE_GROUP: processed = process_replace_group( message, self.__all_columns, self.__state_name ) elif type_ == ReplacementType.EXCLUDE_GROUPS: processed = ExcludeGroupsReplacement.parse_message( message, self.__replacement_context ) else: raise InvalidMessageType("Invalid message type: {}".format(type_)) return processed
def process_message(self, message: ReplacementMessage) -> Optional[Replacement]: type_ = message.action_type event = message.data if type_ in REPLACEMENT_EVENT_TYPES: metrics.increment("process", 1, tags={"type": type_}) if type_ in ( "start_delete_groups", "start_merge", "start_unmerge", "start_unmerge_hierarchical", "start_delete_tag", ): return None elif type_ == "end_delete_groups": processed = process_delete_groups(event, self.__required_columns) elif type_ == "end_merge": processed = process_merge(event, self.__all_columns) elif type_ == "end_unmerge": processed = UnmergeGroupsReplacement.parse_message( event, self.__replacement_context) elif type_ == "end_unmerge_hierarchical": processed = process_unmerge_hierarchical(event, self.__all_columns, self.__state_name) elif type_ == "end_delete_tag": processed = process_delete_tag( event, self.__all_columns, self.__tag_column_map, self.__promoted_tags, self.__use_promoted_prewhere, self.__schema, ) elif type_ == "tombstone_events": processed = process_tombstone_events(event, self.__required_columns, self.__state_name) elif type_ == "replace_group": processed = process_replace_group(event, self.__all_columns, self.__state_name) elif type_ == "exclude_groups": processed = ExcludeGroupsReplacement.parse_message( event, self.__replacement_context) else: raise InvalidMessageType("Invalid message type: {}".format(type_)) return processed
def process_message(self, message: ReplacementMessage) -> Optional[Replacement]: type_ = message.action_type event = message.data if type_ in ( "start_delete_groups", "start_merge", "start_unmerge", "start_delete_tag", ): return None elif type_ == "end_delete_groups": processed = process_delete_groups(event, self.__required_columns) elif type_ == "end_merge": processed = process_merge(event, self.__all_columns) elif type_ == "end_unmerge": processed = process_unmerge(event, self.__all_columns, self.__state_name) elif type_ == "end_delete_tag": processed = process_delete_tag( event, self.__all_columns, self.__tag_column_map, self.__promoted_tags, self.__use_promoted_prewhere, self.__schema, ) elif type_ == "tombstone_events": processed = process_tombstone_events(event, self.__required_columns) elif type_ == "replace_group": processed = process_replace_group(event, self.__all_columns) elif type_ == "exclude_groups": processed = process_exclude_groups(event) else: raise InvalidMessageType("Invalid message type: {}".format(type_)) return processed
def process_message(self, message, metadata=None) -> Optional[ProcessedMessage]: """\ Process a raw message into a tuple of (action_type, processed_message): * action_type: one of the sentinel values INSERT or REPLACE * processed_message: dict representing the processed column -> value(s) Returns `None` if the event is too old to be written. """ action_type = None if isinstance(message, dict): # deprecated unwrapped event message == insert action_type = ProcessorAction.INSERT try: processed = self.process_insert(message, metadata) except EventTooOld: return None elif isinstance(message, (list, tuple)) and len(message) >= 2: version = message[0] if version in (0, 1, 2): # version 0: (0, 'insert', data) # version 1: (1, type, data, [state]) # NOTE: types 'delete_groups', 'merge' and 'unmerge' are ignored # version 2: (2, type, data, [state]) type_, event = message[1:3] if type_ == 'insert': action_type = ProcessorAction.INSERT try: processed = self.process_insert(event, metadata) except EventTooOld: return None else: if version == 0: raise InvalidMessageType( "Invalid message type: {}".format(type_)) elif version == 1: if type_ in ('delete_groups', 'merge', 'unmerge'): # these didn't contain the necessary data to handle replacements return None else: raise InvalidMessageType( "Invalid message type: {}".format(type_)) elif version == 2: # we temporarily sent these invalid message types from Sentry if type_ in ('delete_groups', 'merge'): return None if type_ in ('start_delete_groups', 'start_merge', 'start_unmerge', 'start_delete_tag', 'end_delete_groups', 'end_merge', 'end_unmerge', 'end_delete_tag'): # pass raw events along to republish action_type = ProcessorAction.REPLACE processed = (str(event['project_id']), message) else: raise InvalidMessageType( "Invalid message type: {}".format(type_)) if action_type is None: raise InvalidMessageVersion("Unknown message format: " + str(message)) if processed is None: return None return ProcessedMessage( action=action_type, data=[processed], )