Ejemplo n.º 1
0
    def process_message(
        self,
        message: Tuple[int, str, InsertEvent, Any],
        metadata: KafkaMessageMetadata,
    ) -> Optional[ProcessedMessage]:
        """\
        Process a raw message into an insertion or replacement batch. Returns
        `None` if the event is too old to be written.
        """
        version = message[0]
        if version != 2:
            raise InvalidMessageVersion(
                f"Unsupported message version: {version}")

        # version 2: (2, type, data, [state])
        type_, event = message[1:3]
        if type_ == "insert":
            try:
                row = self.process_insert(event, metadata)
            except EventTooOld:
                return None

            if row is None:  # the processor cannot/does not handle this input
                return None

            return InsertBatch([row], None)
        elif type_ in REPLACEMENT_EVENT_TYPES:
            # pass raw events along to republish
            return ReplacementBatch(str(event["project_id"]), [message])
        else:
            raise InvalidMessageType(f"Invalid message type: {type_}")
Ejemplo n.º 2
0
 def test_v2_end_delete_tag(self):
     project_id = 1
     message = (2, "end_delete_tag", {"project_id": project_id})
     processor = (enforce_table_writer(
         self.dataset).get_stream_loader().get_processor())
     assert processor.process_message(message,
                                      self.metadata) == ReplacementBatch(
                                          str(project_id), [message])
Ejemplo n.º 3
0
    def test_produce_replacement_messages(self):
        producer = FakeConfluentKafkaProducer()
        test_worker = ConsumerWorker(
            self.dataset.get_writable_storage(),
            producer=producer,
            replacements_topic=Topic(
                enforce_table_writer(self.dataset).get_stream_loader().
                get_replacement_topic_spec().topic_name),
            metrics=self.metrics,
        )

        test_worker.flush_batch([
            ReplacementBatch("1", [{
                "project_id": 1
            }]),
            ReplacementBatch("2", [{
                "project_id": 2
            }]),
        ])

        assert [(m._topic, m._key, m._value) for m in producer.messages] == [
            ("event-replacements", b"1", b'{"project_id":1}'),
            ("event-replacements", b"2", b'{"project_id":2}'),
        ]
Ejemplo n.º 4
0
def test_streaming_consumer_strategy() -> None:
    messages = (Message(
        Partition(Topic("events"), 0),
        i,
        KafkaPayload(None, b"{}", None),
        datetime.now(),
    ) for i in itertools.count())

    replacements_producer = FakeConfluentKafkaProducer()

    processor = Mock()
    processor.process_message.side_effect = [
        None,
        InsertBatch([{}]),
        ReplacementBatch("key", [{}]),
    ]

    writer = Mock()

    metrics = TestingMetricsBackend()

    factory = StreamingConsumerStrategyFactory(
        None,
        processor,
        writer,
        metrics,
        max_batch_size=10,
        max_batch_time=60,
        processes=None,
        input_block_size=None,
        output_block_size=None,
        replacements_producer=replacements_producer,
        replacements_topic=Topic("replacements"),
    )

    commit_function = Mock()
    strategy = factory.create(commit_function)

    for i in range(3):
        strategy.poll()
        strategy.submit(next(messages))

    assert metrics.calls == []

    processor.process_message.side_effect = [{}]

    with pytest.raises(TypeError):
        strategy.poll()
        strategy.submit(next(messages))

    def get_number_of_insertion_metrics() -> int:
        count = 0
        for call in metrics.calls:
            if isinstance(call,
                          Timing) and call.name == "insertions.latency_ms":
                count += 1
        return count

    expected_write_count = 1

    with assert_changes(get_number_of_insertion_metrics, 0,
                        expected_write_count), assert_changes(
                            lambda: writer.write.call_count, 0,
                            expected_write_count), assert_changes(
                                lambda: len(replacements_producer.messages), 0,
                                1):
        strategy.close()
        strategy.join()
Ejemplo n.º 5
0
 def test_v2_end_delete_tag(self) -> None:
     project_id = 1
     message = (2, "end_delete_tag", {"project_id": project_id})
     assert self.processor.process_message(
         message,
         self.metadata) == ReplacementBatch(str(project_id), [message])
Ejemplo n.º 6
0
 def test_v2_start_unmerge(self) -> None:
     project_id = 1
     message = (2, "start_unmerge", {"project_id": project_id})
     assert self.processor.process_message(
         message,
         self.metadata) == ReplacementBatch(str(project_id), [message])
Ejemplo n.º 7
0
 def test_v2_start_delete_groups(self):
     project_id = 1
     message = (2, "start_delete_groups", {"project_id": project_id})
     assert self.processor.process_message(
         message,
         self.metadata) == ReplacementBatch(str(project_id), [message])