Ejemplo n.º 1
0
def test_synchronized_consumer_worker_crash() -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker: DummyBroker[int] = DummyBroker()
    broker.create_topic(topic, partitions=1)
    consumer: Consumer[int] = DummyConsumer(broker, "consumer")

    poll_called = Event()

    class BrokenConsumerException(Exception):
        pass

    class BrokenDummyConsumer(DummyConsumer[Commit]):
        def poll(self,
                 timeout: Optional[float] = None) -> Optional[Message[Commit]]:
            try:
                raise BrokenConsumerException()
            finally:
                poll_called.set()

    commit_log_broker: DummyBroker[Commit] = DummyBroker()
    commit_log_broker.create_topic(commit_log_topic, partitions=1)
    commit_log_consumer: Consumer[Commit] = BrokenDummyConsumer(
        commit_log_broker, "commit-log-consumer")

    synchronized_consumer: Consumer[int] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    assert poll_called.wait(1.0) is True

    # If the worker thread has exited without a close request, calling ``poll``
    # should raise an error that originated from the worker thread.

    with pytest.raises(RuntimeError) as e:
        synchronized_consumer.poll(0.0)

    assert type(e.value.__cause__) is BrokenConsumerException

    # If a close request has been sent, the normal runtime error due to the
    # closed consumer should be raised instead.

    synchronized_consumer.close()

    with pytest.raises(RuntimeError) as e:
        synchronized_consumer.poll(0.0)

    assert type(e.value.__cause__) is not BrokenConsumerException
Ejemplo n.º 2
0
def test_synchronized_consumer_handles_end_of_partition() -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker: DummyBroker[int] = DummyBroker()
    broker.create_topic(topic, partitions=1)
    consumer: Consumer[int] = DummyConsumer(broker, "consumer")
    producer: Producer[int] = DummyProducer(broker)
    messages = [producer.produce(topic, i).result(1.0) for i in range(2)]

    commit_log_broker: DummyBroker[Commit] = DummyBroker()
    commit_log_broker.create_topic(commit_log_topic, partitions=1)
    commit_log_consumer: Consumer[Commit] = DummyConsumer(
        commit_log_broker, "commit-log-consumer", enable_end_of_partition=True)
    commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker)

    synchronized_consumer: Consumer[int] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader", Partition(topic, 0),
                       messages[0].get_next_offset()),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[0]

        # If the commit log consumer does not handle EOF, it will have crashed
        # here and will never return the next message.
        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader", Partition(topic, 0),
                       messages[1].get_next_offset()),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[1]
Ejemplo n.º 3
0
    def test_batch_size(self) -> None:
        topic = Topic("topic")
        broker: DummyBroker[int] = DummyBroker()
        broker.create_topic(topic, partitions=1)
        producer: DummyProducer[int] = DummyProducer(broker)
        for i in [1, 2, 3]:
            producer.produce(topic, i).result()

        consumer: DummyConsumer[int] = DummyConsumer(broker, "group")

        worker = FakeWorker()
        batching_consumer = BatchingConsumer(
            consumer,
            topic,
            worker=worker,
            max_batch_size=2,
            max_batch_time=100,
            metrics=DummyMetricsBackend(strict=True),
        )

        for _ in range(3):
            batching_consumer._run_once()

        batching_consumer._shutdown()

        assert worker.processed == [1, 2, 3]
        assert worker.flushed == [[1, 2]]
        assert consumer.commit_offsets_calls == 1
        assert consumer.close_calls == 1
Ejemplo n.º 4
0
    def test_batch_time(self, mock_time: Any) -> None:
        topic = Topic("topic")
        broker: DummyBroker[int] = DummyBroker()
        broker.create_topic(topic, partitions=1)
        producer: DummyProducer[int] = DummyProducer(broker)

        consumer: DummyConsumer[int] = DummyConsumer(broker, " group")

        worker = FakeWorker()
        batching_consumer = BatchingConsumer(
            consumer,
            topic,
            worker=worker,
            max_batch_size=100,
            max_batch_time=2000,
            metrics=DummyMetricsBackend(strict=True),
        )

        mock_time.return_value = time.mktime(datetime(2018, 1, 1, 0, 0, 0).timetuple())

        for i in [1, 2, 3]:
            producer.produce(topic, i).result()

        for _ in range(3):
            batching_consumer._run_once()

        mock_time.return_value = time.mktime(datetime(2018, 1, 1, 0, 0, 1).timetuple())

        for i in [4, 5, 6]:
            producer.produce(topic, i).result()

        for _ in range(3):
            batching_consumer._run_once()

        mock_time.return_value = time.mktime(datetime(2018, 1, 1, 0, 0, 5).timetuple())

        for i in [7, 8, 9]:
            producer.produce(topic, i).result()

        for _ in range(3):
            batching_consumer._run_once()

        batching_consumer._shutdown()

        assert worker.processed == [1, 2, 3, 4, 5, 6, 7, 8, 9]
        assert worker.flushed == [[1, 2, 3, 4, 5, 6]]
        assert consumer.commit_offsets_calls == 1
        assert consumer.close_calls == 1
Ejemplo n.º 5
0
def test_tick_consumer() -> None:
    topic = Topic("messages")

    broker: DummyBroker[int] = DummyBroker()
    broker.create_topic(topic, partitions=2)

    producer: DummyProducer[int] = DummyProducer(broker)
    for partition, payloads in enumerate([[0, 1, 2], [0]]):
        for payload in payloads:
            producer.produce(Partition(topic, partition), payload).result()

    inner_consumer: Consumer[int] = DummyConsumer(broker, "group")

    consumer = TickConsumer(inner_consumer)

    consumer.subscribe([topic])

    assert consumer.tell() == {
        Partition(topic, 0): 0,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 0,
        Partition(topic, 1): 0,
    }

    # consume 0, 0
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 0,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 1,
        Partition(topic, 1): 0,
    }

    # consume 0, 1
    assert consumer.poll() == Message(
        Partition(topic, 0),
        0,
        Tick(offsets=Interval(0, 1), timestamps=Interval(epoch, epoch)),
        epoch,
    )

    assert consumer.tell() == {
        Partition(topic, 0): 1,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 0,
    }

    # consume 0, 2
    assert consumer.poll() == Message(
        Partition(topic, 0),
        1,
        Tick(offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)),
        epoch,
    )

    assert consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 3,
        Partition(topic, 1): 0,
    }

    # consume 1, 0
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 3,
        Partition(topic, 1): 1,
    }

    # consume no message
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 3,
        Partition(topic, 1): 1,
    }

    consumer.seek({Partition(topic, 0): 1})

    assert consumer.tell() == {
        Partition(topic, 0): 1,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 1,
        Partition(topic, 1): 1,
    }

    # consume 0, 1
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 1,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 1,
    }

    # consume 0, 2
    assert consumer.poll() == Message(
        Partition(topic, 0),
        1,
        Tick(offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)),
        epoch,
    )

    assert consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 3,
        Partition(topic, 1): 1,
    }

    with pytest.raises(ConsumerError):
        consumer.seek({Partition(topic, -1): 0})
Ejemplo n.º 6
0
 def setUp(self) -> None:
     self.broker: DummyBroker[int] = DummyBroker()
Ejemplo n.º 7
0
def test_synchronized_consumer() -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker: DummyBroker[int] = DummyBroker()
    broker.create_topic(topic, partitions=1)
    consumer: Consumer[int] = DummyConsumer(broker, "consumer")
    producer: Producer[int] = DummyProducer(broker)
    messages = [producer.produce(topic, i).result(1.0) for i in range(6)]

    commit_log_broker: DummyBroker[Commit] = DummyBroker()
    commit_log_broker.create_topic(commit_log_topic, partitions=1)
    commit_log_consumer: Consumer[Commit] = DummyConsumer(
        commit_log_broker, "commit-log-consumer")
    commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker)

    synchronized_consumer: Consumer[int] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader-a", "leader-b"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        # The consumer should not consume any messages until it receives a
        # commit from both groups that are being followed.
        # TODO: This test is not ideal -- there are no guarantees that the
        # commit log worker has subscribed and started polling yet.
        with assert_changes(consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.tell,
                                {Partition(topic, 0): messages[0].offset}):
            assert synchronized_consumer.poll(0.0) is None

        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader-a", Partition(topic, 0),
                       messages[0].get_next_offset()),
            ).result(),
        )

        # The consumer should remain paused, since it needs both groups to
        # advance before it may continue.
        with assert_does_not_change(
                consumer.paused,
            [Partition(topic, 0)]), assert_does_not_change(
                consumer.tell, {Partition(topic, 0): messages[0].offset}):
            assert synchronized_consumer.poll(0.0) is None

        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader-b", Partition(topic, 0),
                       messages[0].get_next_offset()),
            ).result(),
        )

        # The consumer should be able to resume consuming, since both consumers
        # have processed the first message.
        with assert_changes(
                consumer.paused, [Partition(topic, 0)], []), assert_changes(
                    consumer.tell,
                    {Partition(topic, 0): messages[0].offset},
                    {Partition(topic, 0): messages[0].get_next_offset()},
                ):
            assert synchronized_consumer.poll(0.0) == messages[0]

        # After consuming the one available message, the consumer should be
        # paused again until the remote offsets advance.
        with assert_changes(consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.tell,
                                {Partition(topic, 0): messages[1].offset}):
            assert synchronized_consumer.poll(0.0) is None

        # Emulate the unlikely (but possible) scenario of the leader offsets
        # being within a series of compacted (deleted) messages by:
        # 1. moving the remote offsets forward, so that the partition is resumed
        # 2. seeking the consumer beyond the remote offsets

        commit_log_producer.produce(
            commit_log_topic,
            Commit("leader-a", Partition(topic, 0), messages[3].offset),
        ).result()

        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader-b", Partition(topic, 0), messages[5].offset),
            ).result(),
        )

        # The consumer should be able to resume consuming, since both consumers
        # have processed the first message.
        with assert_changes(
                consumer.paused, [Partition(topic, 0)], []), assert_changes(
                    consumer.tell,
                    {Partition(topic, 0): messages[1].offset},
                    {Partition(topic, 0): messages[1].get_next_offset()},
                ):
            assert synchronized_consumer.poll(0.0) == messages[1]

        # At this point, we manually seek the consumer offset, to emulate messages being skipped.
        with assert_changes(
                consumer.tell,
            {Partition(topic, 0): messages[2].offset},
            {Partition(topic, 0): messages[4].offset},
        ):
            consumer.seek({Partition(topic, 0): messages[4].offset})

        # Since the (effective) remote offset is the offset for message #3 (via
        # ``leader-a``), and the local offset is the offset of message #4, when
        # message #4 is consumed, it should be discarded and the offset should
        # be rolled back to wait for the commit log to advance.
        with assert_changes(consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.tell,
                                {Partition(topic, 0): messages[4].offset}):
            assert synchronized_consumer.poll(0.0) is None

        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader-a", Partition(topic, 0), messages[5].offset),
            ).result(),
        )

        # The consumer should be able to resume consuming.
        with assert_changes(
                consumer.paused, [Partition(topic, 0)], []), assert_changes(
                    consumer.tell,
                    {Partition(topic, 0): messages[4].offset},
                    {Partition(topic, 0): messages[4].get_next_offset()},
                ):
            assert synchronized_consumer.poll(0.0) == messages[4]
Ejemplo n.º 8
0
def test_synchronized_consumer_pause_resume() -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker: DummyBroker[int] = DummyBroker()
    broker.create_topic(topic, partitions=1)
    consumer: Consumer[int] = DummyConsumer(broker, "consumer")
    producer: Producer[int] = DummyProducer(broker)
    messages = [producer.produce(topic, i).result(1.0) for i in range(2)]

    commit_log_broker: DummyBroker[Commit] = DummyBroker()
    commit_log_broker.create_topic(commit_log_topic, partitions=1)
    commit_log_consumer: Consumer[Commit] = DummyConsumer(
        commit_log_broker, "commit-log-consumer")
    commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker)

    synchronized_consumer: Consumer[int] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        # TODO: This test is not ideal -- there are no guarantees that the
        # commit log worker has subscribed and started polling yet.
        with assert_changes(synchronized_consumer.paused, [],
                            [Partition(topic, 0)]), assert_changes(
                                consumer.paused, [], [Partition(topic, 0)]):
            synchronized_consumer.pause([Partition(topic, 0)])

        # Advancing the commit log offset should not cause the consumer to
        # resume, since it has been explicitly paused.
        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader", Partition(topic, 0),
                       messages[0].get_next_offset()),
            ).result(),
        )

        with assert_does_not_change(consumer.paused, [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0) is None

        # Resuming the partition does not immediately cause the partition to
        # resume, but it should look as if it is resumed to the caller.
        with assert_changes(synchronized_consumer.paused,
                            [Partition(topic, 0)], []), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.resume([Partition(topic, 0)])

        # The partition should be resumed on the next poll call, however.
        with assert_changes(consumer.paused, [Partition(topic, 0)], []):
            assert synchronized_consumer.poll(0) == messages[0]

        # Pausing due to hitting the offset fence should not appear as a paused
        # partition to the caller.
        with assert_does_not_change(synchronized_consumer.paused,
                                    []), assert_changes(
                                        consumer.paused, [],
                                        [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0) is None

        # Other pause and resume actions should not cause the inner consumer to
        # change its state while up against the fence.
        with assert_changes(synchronized_consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.pause([Partition(topic, 0)])

        with assert_changes(synchronized_consumer.paused,
                            [Partition(topic, 0)], []), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.resume([Partition(topic, 0)])
Ejemplo n.º 9
0
def test_tick_consumer_non_monotonic() -> None:
    topic = Topic("messages")
    partition = Partition(topic, 0)

    clock = TestingClock(epoch.timestamp())
    broker: DummyBroker[int] = DummyBroker(clock)
    broker.create_topic(topic, partitions=1)

    producer: DummyProducer[int] = DummyProducer(broker)

    inner_consumer: Consumer[int] = DummyConsumer(broker, "group")

    consumer = TickConsumer(inner_consumer)

    consumer.subscribe([topic])

    producer.produce(partition, 0)

    clock.sleep(1)

    producer.produce(partition, 1)

    with assert_changes(inner_consumer.tell, {partition: 0},
                        {partition: 1}), assert_does_not_change(
                            consumer.tell, {partition: 0}):
        assert consumer.poll() is None

    with assert_changes(inner_consumer.tell, {partition: 1},
                        {partition: 2}), assert_changes(
                            consumer.tell, {partition: 0}, {partition: 1}):
        assert consumer.poll() == Message(
            partition,
            0,
            Tick(
                offsets=Interval(0, 1),
                timestamps=Interval(epoch, epoch + timedelta(seconds=1)),
            ),
            epoch + timedelta(seconds=1),
        )

    clock.sleep(-1)

    producer.produce(partition, 2)

    with assert_changes(inner_consumer.tell, {partition: 2},
                        {partition: 3}), assert_does_not_change(
                            consumer.tell, {partition: 1}):
        assert consumer.poll() is None

    clock.sleep(2)

    producer.produce(partition, 3)

    with assert_changes(inner_consumer.tell, {partition: 3},
                        {partition: 4}), assert_changes(
                            consumer.tell, {partition: 1}, {partition: 3}):
        assert consumer.poll() == Message(
            partition,
            1,
            Tick(
                offsets=Interval(1, 3),
                timestamps=Interval(epoch + timedelta(seconds=1),
                                    epoch + timedelta(seconds=2)),
            ),
            epoch + timedelta(seconds=2),
        )