def process_message( self, message: Tuple[int, str, InsertEvent, Any], metadata: KafkaMessageMetadata, ) -> Optional[ProcessedMessage]: """\ Process a raw message into an insertion or replacement batch. Returns `None` if the event is too old to be written. """ version = message[0] if version != 2: raise InvalidMessageVersion( f"Unsupported message version: {version}") # version 2: (2, type, data, [state]) type_, event = message[1:3] if type_ == "insert": try: row = self.process_insert(event, metadata) except EventTooOld: return None if row is None: # the processor cannot/does not handle this input return None return InsertBatch([row], None) elif type_ in REPLACEMENT_EVENT_TYPES: # pass raw events along to republish return ReplacementBatch(str(event["project_id"]), [message]) else: raise InvalidMessageType(f"Invalid message type: {type_}")
def test_v2_end_delete_tag(self): project_id = 1 message = (2, "end_delete_tag", {"project_id": project_id}) processor = (enforce_table_writer( self.dataset).get_stream_loader().get_processor()) assert processor.process_message(message, self.metadata) == ReplacementBatch( str(project_id), [message])
def test_produce_replacement_messages(self): producer = FakeConfluentKafkaProducer() test_worker = ConsumerWorker( self.dataset.get_writable_storage(), producer=producer, replacements_topic=Topic( enforce_table_writer(self.dataset).get_stream_loader(). get_replacement_topic_spec().topic_name), metrics=self.metrics, ) test_worker.flush_batch([ ReplacementBatch("1", [{ "project_id": 1 }]), ReplacementBatch("2", [{ "project_id": 2 }]), ]) assert [(m._topic, m._key, m._value) for m in producer.messages] == [ ("event-replacements", b"1", b'{"project_id":1}'), ("event-replacements", b"2", b'{"project_id":2}'), ]
def test_streaming_consumer_strategy() -> None: messages = (Message( Partition(Topic("events"), 0), i, KafkaPayload(None, b"{}", None), datetime.now(), ) for i in itertools.count()) replacements_producer = FakeConfluentKafkaProducer() processor = Mock() processor.process_message.side_effect = [ None, InsertBatch([{}]), ReplacementBatch("key", [{}]), ] writer = Mock() metrics = TestingMetricsBackend() factory = StreamingConsumerStrategyFactory( None, processor, writer, metrics, max_batch_size=10, max_batch_time=60, processes=None, input_block_size=None, output_block_size=None, replacements_producer=replacements_producer, replacements_topic=Topic("replacements"), ) commit_function = Mock() strategy = factory.create(commit_function) for i in range(3): strategy.poll() strategy.submit(next(messages)) assert metrics.calls == [] processor.process_message.side_effect = [{}] with pytest.raises(TypeError): strategy.poll() strategy.submit(next(messages)) def get_number_of_insertion_metrics() -> int: count = 0 for call in metrics.calls: if isinstance(call, Timing) and call.name == "insertions.latency_ms": count += 1 return count expected_write_count = 1 with assert_changes(get_number_of_insertion_metrics, 0, expected_write_count), assert_changes( lambda: writer.write.call_count, 0, expected_write_count), assert_changes( lambda: len(replacements_producer.messages), 0, 1): strategy.close() strategy.join()
def test_v2_end_delete_tag(self) -> None: project_id = 1 message = (2, "end_delete_tag", {"project_id": project_id}) assert self.processor.process_message( message, self.metadata) == ReplacementBatch(str(project_id), [message])
def test_v2_start_unmerge(self) -> None: project_id = 1 message = (2, "start_unmerge", {"project_id": project_id}) assert self.processor.process_message( message, self.metadata) == ReplacementBatch(str(project_id), [message])
def test_v2_start_delete_groups(self): project_id = 1 message = (2, "start_delete_groups", {"project_id": project_id}) assert self.processor.process_message( message, self.metadata) == ReplacementBatch(str(project_id), [message])