def process_message( self, message: Tuple[int, str, InsertEvent, Any], metadata: KafkaMessageMetadata, ) -> Optional[ProcessedMessage]: """\ Process a raw message into an insertion or replacement batch. Returns `None` if the event is too old to be written. """ version = message[0] if version != 2: raise InvalidMessageVersion( f"Unsupported message version: {version}") # version 2: (2, type, data, [state]) type_, event = message[1:3] if type_ == "insert": try: row = self.process_insert(event, metadata) except EventTooOld: return None if row is None: # the processor cannot/does not handle this input return None return InsertBatch([row], None) elif type_ in REPLACEMENT_EVENT_TYPES: # pass raw events along to republish return ReplacementBatch(str(event["project_id"]), [message]) else: raise InvalidMessageType(f"Invalid message type: {type_}")
def process_message(self, message: KafkaMessage) -> Optional[Replacement]: message = json.loads(message.value) version = message[0] if version == 2: type_, event = message[1:3] if type_ in ('start_delete_groups', 'start_merge', 'start_unmerge', 'start_delete_tag'): return None elif type_ == 'end_delete_groups': processed = process_delete_groups(event, self.__required_columns) elif type_ == 'end_merge': processed = process_merge(event, self.__all_column_names) elif type_ == 'end_unmerge': processed = process_unmerge(event, self.__all_column_names) elif type_ == 'end_delete_tag': processed = process_delete_tag(event, self.dataset) else: raise InvalidMessageType( "Invalid message type: {}".format(type_)) else: raise InvalidMessageVersion("Unknown message format: " + str(message)) return processed
def process_message( self, message: Message[KafkaPayload]) -> Optional[Replacement]: metadata = ReplacementMessageMetadata( partition_index=message.partition.index, offset=message.offset, consumer_group=self.__consumer_group, ) if self._message_already_processed(metadata): logger.warning( f"Replacer ignored a message, consumer group: {self.__consumer_group}", extra={ "partition": metadata.partition_index, "offset": metadata.offset, }, ) if get_config("skip_seen_offsets", False): return None seq_message = json.loads(message.payload.value) [version, action_type, data] = seq_message if version == 2: return self.__replacer_processor.process_message( ReplacementMessage( action_type=action_type, data=data, metadata=metadata, )) else: raise InvalidMessageVersion("Unknown message format: " + str(seq_message))
def process_message( self, message: Message[KafkaPayload]) -> Optional[Replacement]: seq_message = json.loads(message.payload.value) version = seq_message[0] if version == 2: return self.__replacer_processor.process_message( ReplacementMessage(seq_message[1], seq_message[2])) else: raise InvalidMessageVersion("Unknown message format: " + str(seq_message))
def process_message(self, message, metadata=None) -> Optional[ProcessedMessage]: """\ Process a raw message into a tuple of (action_type, processed_message): * action_type: one of the sentinel values INSERT or REPLACE * processed_message: dict representing the processed column -> value(s) Returns `None` if the event is too old to be written. """ action_type = None if isinstance(message, dict): # deprecated unwrapped event message == insert action_type = ProcessorAction.INSERT try: processed = self.process_insert(message, metadata) except EventTooOld: return None elif isinstance(message, (list, tuple)) and len(message) >= 2: version = message[0] if version in (0, 1, 2): # version 0: (0, 'insert', data) # version 1: (1, type, data, [state]) # NOTE: types 'delete_groups', 'merge' and 'unmerge' are ignored # version 2: (2, type, data, [state]) type_, event = message[1:3] if type_ == 'insert': action_type = ProcessorAction.INSERT try: processed = self.process_insert(event, metadata) except EventTooOld: return None else: if version == 0: raise InvalidMessageType( "Invalid message type: {}".format(type_)) elif version == 1: if type_ in ('delete_groups', 'merge', 'unmerge'): # these didn't contain the necessary data to handle replacements return None else: raise InvalidMessageType( "Invalid message type: {}".format(type_)) elif version == 2: # we temporarily sent these invalid message types from Sentry if type_ in ('delete_groups', 'merge'): return None if type_ in ('start_delete_groups', 'start_merge', 'start_unmerge', 'start_delete_tag', 'end_delete_groups', 'end_merge', 'end_unmerge', 'end_delete_tag'): # pass raw events along to republish action_type = ProcessorAction.REPLACE processed = (str(event['project_id']), message) else: raise InvalidMessageType( "Invalid message type: {}".format(type_)) if action_type is None: raise InvalidMessageVersion("Unknown message format: " + str(message)) if processed is None: return None return ProcessedMessage( action=action_type, data=[processed], )