def test_synchronized_consumer_handles_end_of_partition() -> None: topic = Topic("topic") commit_log_topic = Topic("commit-log") broker: DummyBroker[int] = DummyBroker() broker.create_topic(topic, partitions=1) consumer: Consumer[int] = DummyConsumer(broker, "consumer") producer: Producer[int] = DummyProducer(broker) messages = [producer.produce(topic, i).result(1.0) for i in range(2)] commit_log_broker: DummyBroker[Commit] = DummyBroker() commit_log_broker.create_topic(commit_log_topic, partitions=1) commit_log_consumer: Consumer[Commit] = DummyConsumer( commit_log_broker, "commit-log-consumer", enable_end_of_partition=True) commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker) synchronized_consumer: Consumer[int] = SynchronizedConsumer( consumer, commit_log_consumer, commit_log_topic=commit_log_topic, commit_log_groups={"leader"}, ) with closing(synchronized_consumer): synchronized_consumer.subscribe([topic]) wait_for_consumer( commit_log_consumer, commit_log_producer.produce( commit_log_topic, Commit("leader", Partition(topic, 0), messages[0].get_next_offset()), ).result(), ) assert synchronized_consumer.poll(0) == messages[0] # If the commit log consumer does not handle EOF, it will have crashed # here and will never return the next message. wait_for_consumer( commit_log_consumer, commit_log_producer.produce( commit_log_topic, Commit("leader", Partition(topic, 0), messages[1].get_next_offset()), ).result(), ) assert synchronized_consumer.poll(0) == messages[1]
def test_batch_size(self) -> None: topic = Topic("topic") broker: DummyBroker[int] = DummyBroker() broker.create_topic(topic, partitions=1) producer: DummyProducer[int] = DummyProducer(broker) for i in [1, 2, 3]: producer.produce(topic, i).result() consumer: DummyConsumer[int] = DummyConsumer(broker, "group") worker = FakeWorker() batching_consumer = BatchingConsumer( consumer, topic, worker=worker, max_batch_size=2, max_batch_time=100, metrics=DummyMetricsBackend(strict=True), ) for _ in range(3): batching_consumer._run_once() batching_consumer._shutdown() assert worker.processed == [1, 2, 3] assert worker.flushed == [[1, 2]] assert consumer.commit_offsets_calls == 1 assert consumer.close_calls == 1
def get_consumer( self, group: Optional[str] = None, enable_end_of_partition: bool = True ) -> DummyConsumer[int]: return DummyConsumer( self.broker, group if group is not None else uuid.uuid1().hex, enable_end_of_partition=enable_end_of_partition, )
def test_synchronized_consumer_worker_crash() -> None: topic = Topic("topic") commit_log_topic = Topic("commit-log") broker: DummyBroker[int] = DummyBroker() broker.create_topic(topic, partitions=1) consumer: Consumer[int] = DummyConsumer(broker, "consumer") poll_called = Event() class BrokenConsumerException(Exception): pass class BrokenDummyConsumer(DummyConsumer[Commit]): def poll(self, timeout: Optional[float] = None) -> Optional[Message[Commit]]: try: raise BrokenConsumerException() finally: poll_called.set() commit_log_broker: DummyBroker[Commit] = DummyBroker() commit_log_broker.create_topic(commit_log_topic, partitions=1) commit_log_consumer: Consumer[Commit] = BrokenDummyConsumer( commit_log_broker, "commit-log-consumer") synchronized_consumer: Consumer[int] = SynchronizedConsumer( consumer, commit_log_consumer, commit_log_topic=commit_log_topic, commit_log_groups={"leader"}, ) assert poll_called.wait(1.0) is True # If the worker thread has exited without a close request, calling ``poll`` # should raise an error that originated from the worker thread. with pytest.raises(RuntimeError) as e: synchronized_consumer.poll(0.0) assert type(e.value.__cause__) is BrokenConsumerException # If a close request has been sent, the normal runtime error due to the # closed consumer should be raised instead. synchronized_consumer.close() with pytest.raises(RuntimeError) as e: synchronized_consumer.poll(0.0) assert type(e.value.__cause__) is not BrokenConsumerException
def test_batch_time(self, mock_time: Any) -> None: topic = Topic("topic") broker: DummyBroker[int] = DummyBroker() broker.create_topic(topic, partitions=1) producer: DummyProducer[int] = DummyProducer(broker) consumer: DummyConsumer[int] = DummyConsumer(broker, " group") worker = FakeWorker() batching_consumer = BatchingConsumer( consumer, topic, worker=worker, max_batch_size=100, max_batch_time=2000, metrics=DummyMetricsBackend(strict=True), ) mock_time.return_value = time.mktime(datetime(2018, 1, 1, 0, 0, 0).timetuple()) for i in [1, 2, 3]: producer.produce(topic, i).result() for _ in range(3): batching_consumer._run_once() mock_time.return_value = time.mktime(datetime(2018, 1, 1, 0, 0, 1).timetuple()) for i in [4, 5, 6]: producer.produce(topic, i).result() for _ in range(3): batching_consumer._run_once() mock_time.return_value = time.mktime(datetime(2018, 1, 1, 0, 0, 5).timetuple()) for i in [7, 8, 9]: producer.produce(topic, i).result() for _ in range(3): batching_consumer._run_once() batching_consumer._shutdown() assert worker.processed == [1, 2, 3, 4, 5, 6, 7, 8, 9] assert worker.flushed == [[1, 2, 3, 4, 5, 6]] assert consumer.commit_offsets_calls == 1 assert consumer.close_calls == 1
def test_tick_consumer() -> None: topic = Topic("messages") broker: DummyBroker[int] = DummyBroker() broker.create_topic(topic, partitions=2) producer: DummyProducer[int] = DummyProducer(broker) for partition, payloads in enumerate([[0, 1, 2], [0]]): for payload in payloads: producer.produce(Partition(topic, partition), payload).result() inner_consumer: Consumer[int] = DummyConsumer(broker, "group") consumer = TickConsumer(inner_consumer) consumer.subscribe([topic]) assert consumer.tell() == { Partition(topic, 0): 0, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 0, Partition(topic, 1): 0, } # consume 0, 0 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 0, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } # consume 0, 1 assert consumer.poll() == Message( Partition(topic, 0), 0, Tick(offsets=Interval(0, 1), timestamps=Interval(epoch, epoch)), epoch, ) assert consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } # consume 0, 2 assert consumer.poll() == Message( Partition(topic, 0), 1, Tick(offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)), epoch, ) assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 0, } # consume 1, 0 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 1, } # consume no message assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 1, } consumer.seek({Partition(topic, 0): 1}) assert consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 1, } # consume 0, 1 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 1, } # consume 0, 2 assert consumer.poll() == Message( Partition(topic, 0), 1, Tick(offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)), epoch, ) assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 1, } with pytest.raises(ConsumerError): consumer.seek({Partition(topic, -1): 0})
def test_synchronized_consumer() -> None: topic = Topic("topic") commit_log_topic = Topic("commit-log") broker: DummyBroker[int] = DummyBroker() broker.create_topic(topic, partitions=1) consumer: Consumer[int] = DummyConsumer(broker, "consumer") producer: Producer[int] = DummyProducer(broker) messages = [producer.produce(topic, i).result(1.0) for i in range(6)] commit_log_broker: DummyBroker[Commit] = DummyBroker() commit_log_broker.create_topic(commit_log_topic, partitions=1) commit_log_consumer: Consumer[Commit] = DummyConsumer( commit_log_broker, "commit-log-consumer") commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker) synchronized_consumer: Consumer[int] = SynchronizedConsumer( consumer, commit_log_consumer, commit_log_topic=commit_log_topic, commit_log_groups={"leader-a", "leader-b"}, ) with closing(synchronized_consumer): synchronized_consumer.subscribe([topic]) # The consumer should not consume any messages until it receives a # commit from both groups that are being followed. # TODO: This test is not ideal -- there are no guarantees that the # commit log worker has subscribed and started polling yet. with assert_changes(consumer.paused, [], [Partition(topic, 0)]), assert_does_not_change( consumer.tell, {Partition(topic, 0): messages[0].offset}): assert synchronized_consumer.poll(0.0) is None wait_for_consumer( commit_log_consumer, commit_log_producer.produce( commit_log_topic, Commit("leader-a", Partition(topic, 0), messages[0].get_next_offset()), ).result(), ) # The consumer should remain paused, since it needs both groups to # advance before it may continue. with assert_does_not_change( consumer.paused, [Partition(topic, 0)]), assert_does_not_change( consumer.tell, {Partition(topic, 0): messages[0].offset}): assert synchronized_consumer.poll(0.0) is None wait_for_consumer( commit_log_consumer, commit_log_producer.produce( commit_log_topic, Commit("leader-b", Partition(topic, 0), messages[0].get_next_offset()), ).result(), ) # The consumer should be able to resume consuming, since both consumers # have processed the first message. with assert_changes( consumer.paused, [Partition(topic, 0)], []), assert_changes( consumer.tell, {Partition(topic, 0): messages[0].offset}, {Partition(topic, 0): messages[0].get_next_offset()}, ): assert synchronized_consumer.poll(0.0) == messages[0] # After consuming the one available message, the consumer should be # paused again until the remote offsets advance. with assert_changes(consumer.paused, [], [Partition(topic, 0)]), assert_does_not_change( consumer.tell, {Partition(topic, 0): messages[1].offset}): assert synchronized_consumer.poll(0.0) is None # Emulate the unlikely (but possible) scenario of the leader offsets # being within a series of compacted (deleted) messages by: # 1. moving the remote offsets forward, so that the partition is resumed # 2. seeking the consumer beyond the remote offsets commit_log_producer.produce( commit_log_topic, Commit("leader-a", Partition(topic, 0), messages[3].offset), ).result() wait_for_consumer( commit_log_consumer, commit_log_producer.produce( commit_log_topic, Commit("leader-b", Partition(topic, 0), messages[5].offset), ).result(), ) # The consumer should be able to resume consuming, since both consumers # have processed the first message. with assert_changes( consumer.paused, [Partition(topic, 0)], []), assert_changes( consumer.tell, {Partition(topic, 0): messages[1].offset}, {Partition(topic, 0): messages[1].get_next_offset()}, ): assert synchronized_consumer.poll(0.0) == messages[1] # At this point, we manually seek the consumer offset, to emulate messages being skipped. with assert_changes( consumer.tell, {Partition(topic, 0): messages[2].offset}, {Partition(topic, 0): messages[4].offset}, ): consumer.seek({Partition(topic, 0): messages[4].offset}) # Since the (effective) remote offset is the offset for message #3 (via # ``leader-a``), and the local offset is the offset of message #4, when # message #4 is consumed, it should be discarded and the offset should # be rolled back to wait for the commit log to advance. with assert_changes(consumer.paused, [], [Partition(topic, 0)]), assert_does_not_change( consumer.tell, {Partition(topic, 0): messages[4].offset}): assert synchronized_consumer.poll(0.0) is None wait_for_consumer( commit_log_consumer, commit_log_producer.produce( commit_log_topic, Commit("leader-a", Partition(topic, 0), messages[5].offset), ).result(), ) # The consumer should be able to resume consuming. with assert_changes( consumer.paused, [Partition(topic, 0)], []), assert_changes( consumer.tell, {Partition(topic, 0): messages[4].offset}, {Partition(topic, 0): messages[4].get_next_offset()}, ): assert synchronized_consumer.poll(0.0) == messages[4]
def test_synchronized_consumer_pause_resume() -> None: topic = Topic("topic") commit_log_topic = Topic("commit-log") broker: DummyBroker[int] = DummyBroker() broker.create_topic(topic, partitions=1) consumer: Consumer[int] = DummyConsumer(broker, "consumer") producer: Producer[int] = DummyProducer(broker) messages = [producer.produce(topic, i).result(1.0) for i in range(2)] commit_log_broker: DummyBroker[Commit] = DummyBroker() commit_log_broker.create_topic(commit_log_topic, partitions=1) commit_log_consumer: Consumer[Commit] = DummyConsumer( commit_log_broker, "commit-log-consumer") commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker) synchronized_consumer: Consumer[int] = SynchronizedConsumer( consumer, commit_log_consumer, commit_log_topic=commit_log_topic, commit_log_groups={"leader"}, ) with closing(synchronized_consumer): synchronized_consumer.subscribe([topic]) # TODO: This test is not ideal -- there are no guarantees that the # commit log worker has subscribed and started polling yet. with assert_changes(synchronized_consumer.paused, [], [Partition(topic, 0)]), assert_changes( consumer.paused, [], [Partition(topic, 0)]): synchronized_consumer.pause([Partition(topic, 0)]) # Advancing the commit log offset should not cause the consumer to # resume, since it has been explicitly paused. wait_for_consumer( commit_log_consumer, commit_log_producer.produce( commit_log_topic, Commit("leader", Partition(topic, 0), messages[0].get_next_offset()), ).result(), ) with assert_does_not_change(consumer.paused, [Partition(topic, 0)]): assert synchronized_consumer.poll(0) is None # Resuming the partition does not immediately cause the partition to # resume, but it should look as if it is resumed to the caller. with assert_changes(synchronized_consumer.paused, [Partition(topic, 0)], []), assert_does_not_change( consumer.paused, [Partition(topic, 0)]): synchronized_consumer.resume([Partition(topic, 0)]) # The partition should be resumed on the next poll call, however. with assert_changes(consumer.paused, [Partition(topic, 0)], []): assert synchronized_consumer.poll(0) == messages[0] # Pausing due to hitting the offset fence should not appear as a paused # partition to the caller. with assert_does_not_change(synchronized_consumer.paused, []), assert_changes( consumer.paused, [], [Partition(topic, 0)]): assert synchronized_consumer.poll(0) is None # Other pause and resume actions should not cause the inner consumer to # change its state while up against the fence. with assert_changes(synchronized_consumer.paused, [], [Partition(topic, 0)]), assert_does_not_change( consumer.paused, [Partition(topic, 0)]): synchronized_consumer.pause([Partition(topic, 0)]) with assert_changes(synchronized_consumer.paused, [Partition(topic, 0)], []), assert_does_not_change( consumer.paused, [Partition(topic, 0)]): synchronized_consumer.resume([Partition(topic, 0)])
def test_tick_consumer_non_monotonic() -> None: topic = Topic("messages") partition = Partition(topic, 0) clock = TestingClock(epoch.timestamp()) broker: DummyBroker[int] = DummyBroker(clock) broker.create_topic(topic, partitions=1) producer: DummyProducer[int] = DummyProducer(broker) inner_consumer: Consumer[int] = DummyConsumer(broker, "group") consumer = TickConsumer(inner_consumer) consumer.subscribe([topic]) producer.produce(partition, 0) clock.sleep(1) producer.produce(partition, 1) with assert_changes(inner_consumer.tell, {partition: 0}, {partition: 1}), assert_does_not_change( consumer.tell, {partition: 0}): assert consumer.poll() is None with assert_changes(inner_consumer.tell, {partition: 1}, {partition: 2}), assert_changes( consumer.tell, {partition: 0}, {partition: 1}): assert consumer.poll() == Message( partition, 0, Tick( offsets=Interval(0, 1), timestamps=Interval(epoch, epoch + timedelta(seconds=1)), ), epoch + timedelta(seconds=1), ) clock.sleep(-1) producer.produce(partition, 2) with assert_changes(inner_consumer.tell, {partition: 2}, {partition: 3}), assert_does_not_change( consumer.tell, {partition: 1}): assert consumer.poll() is None clock.sleep(2) producer.produce(partition, 3) with assert_changes(inner_consumer.tell, {partition: 3}, {partition: 4}), assert_changes( consumer.tell, {partition: 1}, {partition: 3}): assert consumer.poll() == Message( partition, 1, Tick( offsets=Interval(1, 3), timestamps=Interval(epoch + timedelta(seconds=1), epoch + timedelta(seconds=2)), ), epoch + timedelta(seconds=2), )