def test_parallel_transform_step_terminate_workers() -> None: next_step = Mock() starting_processes = get_subprocess_count() worker_processes = 2 manager_processes = 1 with assert_changes( get_subprocess_count, starting_processes, starting_processes + worker_processes + manager_processes, ): transform_step = ParallelTransformStep( transform_payload_expand, # doesn't matter next_step, processes=worker_processes, max_batch_size=5, max_batch_time=60, input_block_size=4096, output_block_size=4096, metrics=TestingMetricsBackend(), ) with assert_changes( get_subprocess_count, starting_processes + worker_processes + manager_processes, starting_processes, ), assert_changes(lambda: next_step.terminate.call_count, 0, 1): transform_step.terminate()
def test_stream_processor_termination_on_error() -> None: topic = Topic("test") consumer = mock.Mock() consumer.poll.return_value = Message(Partition(topic, 0), 0, 0, datetime.now()) exception = NotImplementedError("error") strategy = mock.Mock() strategy.submit.side_effect = exception factory = mock.Mock() factory.create.return_value = strategy processor: StreamProcessor[int] = StreamProcessor(consumer, topic, factory, TestingMetricsBackend()) assignment_callback = consumer.subscribe.call_args.kwargs["on_assign"] assignment_callback({Partition(topic, 0): 0}) with pytest.raises(Exception) as e, assert_changes( lambda: strategy.terminate.call_count, 0, 1), assert_changes(lambda: consumer.close.call_count, 0, 1): processor.run() assert e.value == exception
def test_transform() -> None: next_step = Mock() def transform_function(message: Message[int]) -> int: return message.payload * 2 transform_step = TransformStep(transform_function, next_step) original_message = Message(Partition(Topic("topic"), 0), 0, 1, datetime.now()) with assert_changes(lambda: next_step.submit.call_count, 0, 1): transform_step.submit(original_message) assert next_step.submit.call_args == call( Message( original_message.partition, original_message.offset, transform_function(original_message), original_message.timestamp, )) with assert_changes(lambda: next_step.poll.call_count, 0, 1): transform_step.poll() with assert_changes(lambda: next_step.close.call_count, 0, 1), assert_changes(lambda: next_step.join.call_count, 0, 1): transform_step.join()
def test_filter() -> None: next_step = Mock() def test_function(message: Message[bool]) -> bool: return message.payload filter_step = FilterStep(test_function, next_step) fail_message = Message(Partition(Topic("topic"), 0), 0, False, datetime.now()) with assert_does_not_change(lambda: next_step.submit.call_count, 0): filter_step.submit(fail_message) pass_message = Message(Partition(Topic("topic"), 0), 0, True, datetime.now()) with assert_changes(lambda: next_step.submit.call_count, 0, 1): filter_step.submit(pass_message) assert next_step.submit.call_args == call(pass_message) with assert_changes(lambda: next_step.poll.call_count, 0, 1): filter_step.poll() with assert_changes(lambda: next_step.close.call_count, 0, 1), assert_changes(lambda: next_step.join.call_count, 0, 1): filter_step.join()
def test_multistorage_strategy( processes: Optional[int], input_block_size: Optional[int], output_block_size: Optional[int], ) -> None: from snuba.datasets.storages import groupassignees, groupedmessages from tests.datasets.cdc.test_groupassignee import TestGroupassignee from tests.datasets.cdc.test_groupedmessage import TestGroupedMessage commit = Mock() storages = [groupassignees.storage, groupedmessages.storage] strategy = MultistorageConsumerProcessingStrategyFactory( storages, 10, 10, processes, input_block_size, output_block_size, TestingMetricsBackend(), ).create(commit) payloads = [ KafkaPayload(None, b"{}", [("table", b"ignored")]), KafkaPayload( None, json.dumps(TestGroupassignee.INSERT_MSG).encode("utf8"), [("table", groupassignees.storage.get_postgres_table().encode("utf8"))], ), KafkaPayload( None, json.dumps(TestGroupedMessage.INSERT_MSG).encode("utf8"), [("table", groupedmessages.storage.get_postgres_table().encode("utf8"))], ), ] messages = [ Message( Partition(Topic("topic"), 0), offset, payload, datetime.now(), offset + 1 ) for offset, payload in enumerate(payloads) ] with assert_changes( lambda: get_row_count(groupassignees.storage), 0, 1 ), assert_changes(lambda: get_row_count(groupedmessages.storage), 0, 1): for message in messages: strategy.submit(message) with assert_changes( lambda: commit.call_args_list, [], [call({Partition(Topic("topic"), 0): 3})] ): strategy.close() strategy.join()
def test_get_readthrough(backend: Cache[bytes]) -> None: key = "key" value = b"value" function = mock.MagicMock(return_value=value) assert backend.get(key) is None with assert_changes(lambda: function.call_count, 0, 1): backend.get_readthrough(key, function, noop, 5) == value assert backend.get(key) == value with assert_does_not_change(lambda: function.call_count, 1): backend.get_readthrough(key, function, noop, 5) == value
def test_collect() -> None: step_factory = Mock() step_factory.return_value = inner_step = Mock() commit_function = Mock() partition = Partition(Topic("topic"), 0) messages = message_generator(partition, 0) collect_step = CollectStep(step_factory, commit_function, 2, 60) # A batch should be started the first time the step receives a message. with assert_changes(lambda: step_factory.call_count, 0, 1): collect_step.poll() collect_step.submit(next(messages)) # offset 0 # Subsequent messages should reuse the existing batch, ... with assert_does_not_change(lambda: step_factory.call_count, 1): collect_step.poll() collect_step.submit(next(messages)) # offset 1 # ...until we hit the batch size limit. with assert_changes(lambda: inner_step.close.call_count, 0, 1), assert_changes( lambda: inner_step.join.call_count, 0, 1), assert_changes( lambda: commit_function.call_count, 0, 1): collect_step.poll() assert commit_function.call_args == call({partition: 2}) step_factory.return_value = inner_step = Mock() # The next message should create a new batch. with assert_changes(lambda: step_factory.call_count, 1, 2): collect_step.submit(next(messages)) with assert_changes(lambda: inner_step.close.call_count, 0, 1): collect_step.close() with assert_changes(lambda: inner_step.join.call_count, 0, 1), assert_changes(lambda: commit_function.call_count, 1, 2): collect_step.join()
def test_message_batch() -> None: partition = Partition(Topic("test"), 0) with SharedMemoryManager() as smm: block = smm.SharedMemory(4096) assert block.size == 4096 message = Message(partition, 0, KafkaPayload(None, b"\x00" * 4000, None), datetime.now()) batch: MessageBatch[KafkaPayload] = MessageBatch(block) with assert_changes(lambda: len(batch), 0, 1): batch.append(message) assert batch[0] == message assert list(batch) == [message] with assert_does_not_change(lambda: len(batch), 1), pytest.raises(ValueTooLarge): batch.append(message)
def test_streaming_consumer_strategy() -> None: messages = (Message( Partition(Topic("events"), 0), i, KafkaPayload(None, b"{}", None), datetime.now(), ) for i in itertools.count()) replacements_producer = FakeConfluentKafkaProducer() processor = Mock() processor.process_message.side_effect = [ None, InsertBatch([{}]), ReplacementBatch("key", [{}]), ] writer = Mock() metrics = TestingMetricsBackend() factory = StreamingConsumerStrategyFactory( None, processor, writer, metrics, max_batch_size=10, max_batch_time=60, processes=None, input_block_size=None, output_block_size=None, replacements_producer=replacements_producer, replacements_topic=Topic("replacements"), ) commit_function = Mock() strategy = factory.create(commit_function) for i in range(3): strategy.poll() strategy.submit(next(messages)) assert metrics.calls == [] processor.process_message.side_effect = [{}] with pytest.raises(TypeError): strategy.poll() strategy.submit(next(messages)) def get_number_of_insertion_metrics() -> int: count = 0 for call in metrics.calls: if isinstance(call, Timing) and call.name == "insertions.latency_ms": count += 1 return count expected_write_count = 1 with assert_changes(get_number_of_insertion_metrics, 0, expected_write_count), assert_changes( lambda: writer.write.call_count, 0, expected_write_count), assert_changes( lambda: len(replacements_producer.messages), 0, 1): strategy.close() strategy.join()
def test_tick_consumer(time_shift: Optional[timedelta]) -> None: clock = TestingClock() broker: Broker[KafkaPayload] = Broker(MemoryMessageStorage(), clock) epoch = datetime.fromtimestamp(clock.time()) topic = Topic("messages") followed_consumer_group = "events" broker.create_topic(topic, partitions=1) producer = broker.get_producer() for partition, offsets in enumerate([[0, 1, 2], [0]]): for offset in offsets: payload = commit_codec.encode( Commit(followed_consumer_group, Partition(topic, partition), offset, epoch)) producer.produce(Partition(topic, 0), payload).result() inner_consumer = broker.get_consumer("group") consumer = CommitLogTickConsumer(inner_consumer, followed_consumer_group, time_shift=time_shift) if time_shift is None: time_shift = timedelta() def _assignment_callback(offsets: Mapping[Partition, int]) -> None: assert consumer.tell() == { Partition(topic, 0): 0, } assignment_callback = mock.Mock(side_effect=_assignment_callback) consumer.subscribe([topic], on_assign=assignment_callback) with assert_changes(lambda: assignment_callback.called, False, True): # consume 0, 0 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 1, } # consume 0, 1 assert consumer.poll() == Message( Partition(topic, 0), 1, Tick(0, offsets=Interval(0, 1), timestamps=Interval(epoch, epoch)).time_shift(time_shift), epoch, ) assert consumer.tell() == { Partition(topic, 0): 2, } # consume 0, 2 assert consumer.poll() == Message( Partition(topic, 0), 2, Tick(0, offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)).time_shift(time_shift), epoch, ) assert consumer.tell() == { Partition(topic, 0): 3, } # consume 1, 0 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 4, } # consume no message assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 4, } consumer.seek({Partition(topic, 0): 1}) assert consumer.tell() == { Partition(topic, 0): 1, } # consume 0, 1 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 2, } # consume 0, 2 assert consumer.poll() == Message( Partition(topic, 0), 2, Tick(0, offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)).time_shift(time_shift), epoch, ) assert consumer.tell() == { Partition(topic, 0): 3, } with pytest.raises(ConsumerError): consumer.seek({Partition(topic, -1): 0})
def test_tick_consumer_non_monotonic() -> None: clock = TestingClock() broker: Broker[KafkaPayload] = Broker(MemoryMessageStorage(), clock) epoch = datetime.fromtimestamp(clock.time()) topic = Topic("messages") followed_consumer_group = "events" partition = Partition(topic, 0) broker.create_topic(topic, partitions=1) producer = broker.get_producer() inner_consumer = broker.get_consumer("group") consumer = CommitLogTickConsumer(inner_consumer, followed_consumer_group) def _assignment_callback(offsets: Mapping[Partition, int]) -> None: assert inner_consumer.tell() == {partition: 0} assert consumer.tell() == {partition: 0} assignment_callback = mock.Mock(side_effect=_assignment_callback) consumer.subscribe([topic], on_assign=assignment_callback) producer.produce( partition, commit_codec.encode( Commit(followed_consumer_group, partition, 0, epoch)), ).result() clock.sleep(1) producer.produce( partition, commit_codec.encode( Commit(followed_consumer_group, partition, 1, epoch + timedelta(seconds=1))), ).result() with assert_changes(lambda: assignment_callback.called, False, True): assert consumer.poll() is None assert consumer.tell() == {partition: 1} with assert_changes(consumer.tell, {partition: 1}, {partition: 2}): assert consumer.poll() == Message( partition, 1, Tick( 0, offsets=Interval(0, 1), timestamps=Interval(epoch, epoch + timedelta(seconds=1)), ), epoch + timedelta(seconds=1), ) clock.sleep(-1) producer.produce( partition, commit_codec.encode( Commit(followed_consumer_group, partition, 2, epoch)), ).result() with assert_changes(consumer.tell, {partition: 2}, {partition: 3}): assert consumer.poll() is None clock.sleep(2) producer.produce( partition, commit_codec.encode( Commit(followed_consumer_group, partition, 3, epoch + timedelta(seconds=2))), ).result() with assert_changes(consumer.tell, {partition: 3}, {partition: 4}): assert consumer.poll() == Message( partition, 3, Tick( 0, offsets=Interval(1, 3), timestamps=Interval(epoch + timedelta(seconds=1), epoch + timedelta(seconds=2)), ), epoch + timedelta(seconds=2), )
def test_working_offsets(self) -> None: payloads = self.get_payloads() with self.get_topic() as topic: with closing(self.get_producer()) as producer: messages = [ producer.produce(topic, next(payloads)).result(5.0) ] def on_assign(partitions: Mapping[Partition, int]) -> None: # NOTE: This will eventually need to be controlled by a generalized # consumer auto offset reset setting. assert (partitions == consumer.tell() == { messages[0].partition: messages[0].offset }) consumer = self.get_consumer() consumer.subscribe([topic], on_assign=on_assign) for i in range(5): message = consumer.poll(1.0) if message is not None: break else: time.sleep(1.0) else: raise Exception("assignment never received") assert message == messages[0] # The first call to ``poll`` should raise ``EndOfPartition``. It # should be otherwise be safe to try to read the first missing # offset (index) in the partition. with assert_does_not_change( consumer.tell, {message.partition: message.next_offset }), pytest.raises(EndOfPartition): consumer.poll(1.0) is None # It should be otherwise be safe to try to read the first missing # offset (index) in the partition. with assert_does_not_change( consumer.tell, {message.partition: message.next_offset}): assert consumer.poll(1.0) is None with assert_changes( consumer.tell, {message.partition: message.next_offset}, {message.partition: message.offset}, ): consumer.seek({message.partition: message.offset}) with assert_changes( consumer.tell, {message.partition: message.offset}, {message.partition: message.next_offset}, ): assert consumer.poll(1.0) == messages[0] # Seeking beyond the first missing index should work but subsequent # reads should error. (We don't know if this offset is valid or not # until we try to fetch a message.) with assert_changes( consumer.tell, {message.partition: message.next_offset}, {message.partition: message.next_offset + 1}, ): consumer.seek({message.partition: message.next_offset + 1}) # Offsets should not be advanced after a failed poll. with assert_does_not_change( consumer.tell, {message.partition: message.next_offset + 1 }), pytest.raises(ConsumerError): consumer.poll(1.0) # Trying to seek on an unassigned partition should error. with assert_does_not_change( consumer.tell, {message.partition: message.next_offset + 1 }), pytest.raises(ConsumerError): consumer.seek({message.partition: 0, Partition(topic, -1): 0}) # Trying to seek to a negative offset should error. with assert_does_not_change( consumer.tell, {message.partition: message.next_offset + 1 }), pytest.raises(ConsumerError): consumer.seek({message.partition: -1})
def test_synchronized_consumer_pause_resume() -> None: topic = Topic("topic") commit_log_topic = Topic("commit-log") broker: DummyBroker[int] = DummyBroker() broker.create_topic(topic, partitions=1) consumer: Consumer[int] = DummyConsumer(broker, "consumer") producer: Producer[int] = DummyProducer(broker) messages = [producer.produce(topic, i).result(1.0) for i in range(2)] commit_log_broker: DummyBroker[Commit] = DummyBroker() commit_log_broker.create_topic(commit_log_topic, partitions=1) commit_log_consumer: Consumer[Commit] = DummyConsumer( commit_log_broker, "commit-log-consumer") commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker) synchronized_consumer: Consumer[int] = SynchronizedConsumer( consumer, commit_log_consumer, commit_log_topic=commit_log_topic, commit_log_groups={"leader"}, ) with closing(synchronized_consumer): synchronized_consumer.subscribe([topic]) # TODO: This test is not ideal -- there are no guarantees that the # commit log worker has subscribed and started polling yet. with assert_changes(synchronized_consumer.paused, [], [Partition(topic, 0)]), assert_changes( consumer.paused, [], [Partition(topic, 0)]): synchronized_consumer.pause([Partition(topic, 0)]) # Advancing the commit log offset should not cause the consumer to # resume, since it has been explicitly paused. wait_for_consumer( commit_log_consumer, commit_log_producer.produce( commit_log_topic, Commit("leader", Partition(topic, 0), messages[0].get_next_offset()), ).result(), ) with assert_does_not_change(consumer.paused, [Partition(topic, 0)]): assert synchronized_consumer.poll(0) is None # Resuming the partition does not immediately cause the partition to # resume, but it should look as if it is resumed to the caller. with assert_changes(synchronized_consumer.paused, [Partition(topic, 0)], []), assert_does_not_change( consumer.paused, [Partition(topic, 0)]): synchronized_consumer.resume([Partition(topic, 0)]) # The partition should be resumed on the next poll call, however. with assert_changes(consumer.paused, [Partition(topic, 0)], []): assert synchronized_consumer.poll(0) == messages[0] # Pausing due to hitting the offset fence should not appear as a paused # partition to the caller. with assert_does_not_change(synchronized_consumer.paused, []), assert_changes( consumer.paused, [], [Partition(topic, 0)]): assert synchronized_consumer.poll(0) is None # Other pause and resume actions should not cause the inner consumer to # change its state while up against the fence. with assert_changes(synchronized_consumer.paused, [], [Partition(topic, 0)]), assert_does_not_change( consumer.paused, [Partition(topic, 0)]): synchronized_consumer.pause([Partition(topic, 0)]) with assert_changes(synchronized_consumer.paused, [Partition(topic, 0)], []), assert_does_not_change( consumer.paused, [Partition(topic, 0)]): synchronized_consumer.resume([Partition(topic, 0)])
def test_pause_resume(self) -> None: payloads = self.get_payloads() with self.get_topic() as topic, closing( self.get_consumer()) as consumer, closing( self.get_producer()) as producer: messages = [ producer.produce(topic, next(payloads)).result(timeout=5.0) for i in range(5) ] consumer.subscribe([topic]) assert consumer.poll(10.0) == messages[0] assert consumer.paused() == [] # XXX: Unfortunately, there is really no way to prove that this # consumer would return the message other than by waiting a while. with assert_changes(consumer.paused, [], [Partition(topic, 0)]): consumer.pause([Partition(topic, 0)]) assert consumer.poll(1.0) is None # We should pick up where we left off when we resume the partition. with assert_changes(consumer.paused, [Partition(topic, 0)], []): consumer.resume([Partition(topic, 0)]) assert consumer.poll(5.0) == messages[1] # Calling ``seek`` should have a side effect, even if no messages # are consumed before calling ``pause``. with assert_changes( consumer.tell, {Partition(topic, 0): messages[1].next_offset}, {Partition(topic, 0): messages[3].offset}, ): consumer.seek({Partition(topic, 0): messages[3].offset}) consumer.pause([Partition(topic, 0)]) assert consumer.poll(1.0) is None consumer.resume([Partition(topic, 0)]) assert consumer.poll(5.0) == messages[3] # It is still allowable to call ``seek`` on a paused partition. # When consumption resumes, we would expect to see the side effect # of that seek. consumer.pause([Partition(topic, 0)]) with assert_changes( consumer.tell, {Partition(topic, 0): messages[3].next_offset}, {Partition(topic, 0): messages[0].offset}, ): consumer.seek({Partition(topic, 0): messages[0].offset}) assert consumer.poll(1.0) is None consumer.resume([Partition(topic, 0)]) assert consumer.poll(5.0) == messages[0] with assert_does_not_change(consumer.paused, []), pytest.raises(ConsumerError): consumer.pause([Partition(topic, 0), Partition(topic, 1)]) with assert_changes(consumer.paused, [], [Partition(topic, 0)]): consumer.pause([Partition(topic, 0)]) with assert_does_not_change( consumer.paused, [Partition(topic, 0)]), pytest.raises(ConsumerError): consumer.resume([Partition(topic, 0), Partition(topic, 1)])
def test_consumer(self) -> None: group = uuid.uuid1().hex payloads = self.get_payloads() with self.get_topic() as topic: with closing(self.get_producer()) as producer: messages = [ future.result(timeout=5.0) for future in [ producer.produce(topic, next(payloads)) for i in range(2) ] ] consumer = self.get_consumer(group) def assignment_callback( partitions: Mapping[Partition, int]) -> None: assignment_callback.called = True assert partitions == {Partition(topic, 0): messages[0].offset} consumer.seek({Partition(topic, 0): messages[1].offset}) with pytest.raises(ConsumerError): consumer.seek({Partition(topic, 1): 0}) assignment_callback.called = False def revocation_callback(partitions: Sequence[Partition]) -> None: revocation_callback.called = True assert partitions == [Partition(topic, 0)] assert consumer.tell() == { Partition(topic, 0): messages[1].offset } # Not sure why you'd want to do this, but it shouldn't error. consumer.seek({Partition(topic, 0): messages[0].offset}) revocation_callback.called = False # TODO: It'd be much nicer if ``subscribe`` returned a future that we could # use to wait for assignment, but we'd need to be very careful to avoid # edge cases here. It's probably not worth the complexity for now. consumer.subscribe([topic], on_assign=assignment_callback, on_revoke=revocation_callback) with assert_changes( lambda: assignment_callback.called, False, True), assert_changes( consumer.tell, {}, {Partition(topic, 0): messages[1].next_offset}): message = consumer.poll( 10.0) # XXX: getting the subcription is slow assert isinstance(message, Message) assert message.partition == Partition(topic, 0) assert message.offset == messages[1].offset assert message.payload == messages[1].payload consumer.seek({Partition(topic, 0): messages[0].offset}) assert consumer.tell() == {Partition(topic, 0): messages[0].offset} with pytest.raises(ConsumerError): consumer.seek({Partition(topic, 1): 0}) with assert_changes(consumer.paused, [], [Partition(topic, 0)]): consumer.pause([Partition(topic, 0)]) # Even if there is another message available, ``poll`` should # return ``None`` if the consumer is paused. assert consumer.poll(1.0) is None with assert_changes(consumer.paused, [Partition(topic, 0)], []): consumer.resume([Partition(topic, 0)]) message = consumer.poll(1.0) assert isinstance(message, Message) assert message.partition == Partition(topic, 0) assert message.offset == messages[0].offset assert message.payload == messages[0].payload assert consumer.commit_offsets() == {} consumer.stage_offsets({message.partition: message.next_offset}) with pytest.raises(ConsumerError): consumer.stage_offsets({Partition(Topic("invalid"), 0): 0}) assert consumer.commit_offsets() == { Partition(topic, 0): message.next_offset } assert consumer.tell() == {Partition(topic, 0): messages[1].offset} consumer.unsubscribe() with assert_changes(lambda: revocation_callback.called, False, True): assert consumer.poll(1.0) is None assert consumer.tell() == {} with pytest.raises(ConsumerError): consumer.seek({Partition(topic, 0): messages[0].offset}) revocation_callback.called = False with assert_changes(lambda: consumer.closed, False, True), assert_does_not_change( lambda: revocation_callback.called, False): consumer.close() # Make sure all public methods (except ``close```) error if called # after the consumer has been closed. with pytest.raises(RuntimeError): consumer.subscribe([topic]) with pytest.raises(RuntimeError): consumer.unsubscribe() with pytest.raises(RuntimeError): consumer.poll() with pytest.raises(RuntimeError): consumer.tell() with pytest.raises(RuntimeError): consumer.seek({Partition(topic, 0): messages[0].offset}) with pytest.raises(RuntimeError): consumer.pause([Partition(topic, 0)]) with pytest.raises(RuntimeError): consumer.resume([Partition(topic, 0)]) with pytest.raises(RuntimeError): consumer.paused() with pytest.raises(RuntimeError): consumer.stage_offsets({}) with pytest.raises(RuntimeError): consumer.commit_offsets() consumer.close( ) # should be safe, even if the consumer is already closed consumer = self.get_consumer(group) revocation_callback = mock.MagicMock() consumer.subscribe([topic], on_revoke=revocation_callback) message = consumer.poll( 10.0) # XXX: getting the subscription is slow assert isinstance(message, Message) assert message.partition == Partition(topic, 0) assert message.offset == messages[1].offset assert message.payload == messages[1].payload try: assert consumer.poll(1.0) is None except EndOfPartition as error: assert error.partition == Partition(topic, 0) assert error.offset == message.next_offset else: raise AssertionError("expected EndOfPartition error") with assert_changes(lambda: revocation_callback.called, False, True): consumer.close()
def test_tick_consumer_non_monotonic(clock: Clock, broker: Broker[int]) -> None: epoch = datetime.fromtimestamp(clock.time()) topic = Topic("messages") partition = Partition(topic, 0) broker.create_topic(topic, partitions=1) producer = broker.get_producer() inner_consumer = broker.get_consumer("group") consumer = TickConsumer(inner_consumer) def assignment_callback(offsets: Mapping[Partition, int]) -> None: assignment_callback.called = True assert inner_consumer.tell() == {partition: 0} assert consumer.tell() == {partition: 0} assignment_callback.called = False consumer.subscribe([topic], on_assign=assignment_callback) producer.produce(partition, 0) clock.sleep(1) producer.produce(partition, 1) with assert_changes(lambda: assignment_callback.called, False, True): assert consumer.poll() is None assert inner_consumer.tell() == {partition: 1} assert consumer.tell() == {partition: 0} with assert_changes(inner_consumer.tell, {partition: 1}, {partition: 2}), assert_changes( consumer.tell, {partition: 0}, {partition: 1}): assert consumer.poll() == Message( partition, 0, Tick( offsets=Interval(0, 1), timestamps=Interval(epoch, epoch + timedelta(seconds=1)), ), epoch + timedelta(seconds=1), ) clock.sleep(-1) producer.produce(partition, 2) with assert_changes(inner_consumer.tell, {partition: 2}, {partition: 3}), assert_does_not_change( consumer.tell, {partition: 1}): assert consumer.poll() is None clock.sleep(2) producer.produce(partition, 3) with assert_changes(inner_consumer.tell, {partition: 3}, {partition: 4}), assert_changes( consumer.tell, {partition: 1}, {partition: 3}): assert consumer.poll() == Message( partition, 1, Tick( offsets=Interval(1, 3), timestamps=Interval(epoch + timedelta(seconds=1), epoch + timedelta(seconds=2)), ), epoch + timedelta(seconds=2), )
def test_synchronized_consumer_pause_resume( broker: Broker[KafkaPayload]) -> None: topic = Topic("topic") commit_log_topic = Topic("commit-log") broker.create_topic(topic, partitions=1) broker.create_topic(commit_log_topic, partitions=1) consumer = broker.get_consumer("consumer") producer = broker.get_producer() commit_log_consumer = broker.get_consumer("commit-log-consumer") messages = [ producer.produce(topic, KafkaPayload(None, f"{i}".encode("utf8"), [])).result(1.0) for i in range(2) ] synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer( consumer, commit_log_consumer, commit_log_topic=commit_log_topic, commit_log_groups={"leader"}, ) with closing(synchronized_consumer): def assignment_callback(offsets: Mapping[Partition, int]) -> None: synchronized_consumer.pause([Partition(topic, 0)]) synchronized_consumer.subscribe([topic], on_assign=assignment_callback) with assert_changes(synchronized_consumer.paused, [], [Partition(topic, 0)]), assert_changes( consumer.paused, [], [Partition(topic, 0)]): assert synchronized_consumer.poll(0.0) is None # Advancing the commit log offset should not cause the consumer to # resume, since it has been explicitly paused. wait_for_consumer( commit_log_consumer, producer.produce( commit_log_topic, commit_codec.encode( Commit("leader", Partition(topic, 0), messages[0].next_offset)), ).result(), ) with assert_does_not_change(consumer.paused, [Partition(topic, 0)]): assert synchronized_consumer.poll(0) is None # Resuming the partition does not immediately cause the partition to # resume, but it should look as if it is resumed to the caller. with assert_changes(synchronized_consumer.paused, [Partition(topic, 0)], []), assert_does_not_change( consumer.paused, [Partition(topic, 0)]): synchronized_consumer.resume([Partition(topic, 0)]) # The partition should be resumed on the next poll call, however. with assert_changes(consumer.paused, [Partition(topic, 0)], []): assert synchronized_consumer.poll(0) == messages[0] # Pausing due to hitting the offset fence should not appear as a paused # partition to the caller. with assert_does_not_change(synchronized_consumer.paused, []), assert_changes( consumer.paused, [], [Partition(topic, 0)]): assert synchronized_consumer.poll(0) is None # Other pause and resume actions should not cause the inner consumer to # change its state while up against the fence. with assert_changes(synchronized_consumer.paused, [], [Partition(topic, 0)]), assert_does_not_change( consumer.paused, [Partition(topic, 0)]): synchronized_consumer.pause([Partition(topic, 0)]) with assert_changes(synchronized_consumer.paused, [Partition(topic, 0)], []), assert_does_not_change( consumer.paused, [Partition(topic, 0)]): synchronized_consumer.resume([Partition(topic, 0)])
def test_tick_consumer(clock: Clock, broker: Broker[int], time_shift: Optional[timedelta]) -> None: epoch = datetime.fromtimestamp(clock.time()) topic = Topic("messages") broker.create_topic(topic, partitions=2) producer = broker.get_producer() for partition, payloads in enumerate([[0, 1, 2], [0]]): for payload in payloads: producer.produce(Partition(topic, partition), payload).result() inner_consumer = broker.get_consumer("group") consumer = TickConsumer(inner_consumer, time_shift=time_shift) if time_shift is None: time_shift = timedelta() def assignment_callback(offsets: Mapping[Partition, int]) -> None: assignment_callback.called = True assert consumer.tell() == { Partition(topic, 0): 0, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 0, Partition(topic, 1): 0, } assignment_callback.called = False consumer.subscribe([topic], on_assign=assignment_callback) with assert_changes(lambda: assignment_callback.called, False, True): # consume 0, 0 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 0, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } # consume 0, 1 assert consumer.poll() == Message( Partition(topic, 0), 0, Tick(offsets=Interval(0, 1), timestamps=Interval(epoch, epoch)).time_shift(time_shift), epoch, ) assert consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } # consume 0, 2 assert consumer.poll() == Message( Partition(topic, 0), 1, Tick(offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)).time_shift(time_shift), epoch, ) assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 0, } # consume 1, 0 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 1, } # consume no message assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 1, } consumer.seek({Partition(topic, 0): 1}) assert consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 1, } # consume 0, 1 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 1, } # consume 0, 2 assert consumer.poll() == Message( Partition(topic, 0), 1, Tick(offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)).time_shift(time_shift), epoch, ) assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 1, } with pytest.raises(ConsumerError): consumer.seek({Partition(topic, -1): 0})
def test_synchronized_consumer(broker: Broker[KafkaPayload]) -> None: topic = Topic("topic") commit_log_topic = Topic("commit-log") broker.create_topic(topic, partitions=1) broker.create_topic(commit_log_topic, partitions=1) consumer = broker.get_consumer("consumer") producer = broker.get_producer() commit_log_consumer = broker.get_consumer("commit-log-consumer") messages = [ producer.produce(topic, KafkaPayload(None, f"{i}".encode("utf8"), [])).result(1.0) for i in range(6) ] synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer( consumer, commit_log_consumer, commit_log_topic=commit_log_topic, commit_log_groups={"leader-a", "leader-b"}, ) with closing(synchronized_consumer): synchronized_consumer.subscribe([topic]) # The consumer should not consume any messages until it receives a # commit from both groups that are being followed. with assert_changes(consumer.paused, [], [Partition(topic, 0)]), assert_changes( consumer.tell, {}, {Partition(topic, 0): messages[0].offset}): assert synchronized_consumer.poll(0.0) is None wait_for_consumer( commit_log_consumer, producer.produce( commit_log_topic, commit_codec.encode( Commit("leader-a", Partition(topic, 0), messages[0].next_offset)), ).result(), ) # The consumer should remain paused, since it needs both groups to # advance before it may continue. with assert_does_not_change( consumer.paused, [Partition(topic, 0)]), assert_does_not_change( consumer.tell, {Partition(topic, 0): messages[0].offset}): assert synchronized_consumer.poll(0.0) is None wait_for_consumer( commit_log_consumer, producer.produce( commit_log_topic, commit_codec.encode( Commit("leader-b", Partition(topic, 0), messages[0].next_offset)), ).result(), ) # The consumer should be able to resume consuming, since both consumers # have processed the first message. with assert_changes(consumer.paused, [Partition(topic, 0)], []), assert_changes( consumer.tell, {Partition(topic, 0): messages[0].offset}, {Partition(topic, 0): messages[0].next_offset}, ): assert synchronized_consumer.poll(0.0) == messages[0] # After consuming the one available message, the consumer should be # paused again until the remote offsets advance. with assert_changes(consumer.paused, [], [Partition(topic, 0)]), assert_does_not_change( consumer.tell, {Partition(topic, 0): messages[1].offset}): assert synchronized_consumer.poll(0.0) is None # Emulate the unlikely (but possible) scenario of the leader offsets # being within a series of compacted (deleted) messages by: # 1. moving the remote offsets forward, so that the partition is resumed # 2. seeking the consumer beyond the remote offsets producer.produce( commit_log_topic, commit_codec.encode( Commit("leader-a", Partition(topic, 0), messages[3].offset)), ).result() wait_for_consumer( commit_log_consumer, producer.produce( commit_log_topic, commit_codec.encode( Commit("leader-b", Partition(topic, 0), messages[5].offset)), ).result(), ) # The consumer should be able to resume consuming, since both consumers # have processed the first message. with assert_changes(consumer.paused, [Partition(topic, 0)], []), assert_changes( consumer.tell, {Partition(topic, 0): messages[1].offset}, {Partition(topic, 0): messages[1].next_offset}, ): assert synchronized_consumer.poll(0.0) == messages[1] # At this point, we manually seek the consumer offset, to emulate messages being skipped. with assert_changes( consumer.tell, {Partition(topic, 0): messages[2].offset}, {Partition(topic, 0): messages[4].offset}, ): consumer.seek({Partition(topic, 0): messages[4].offset}) # Since the (effective) remote offset is the offset for message #3 (via # ``leader-a``), and the local offset is the offset of message #4, when # message #4 is consumed, it should be discarded and the offset should # be rolled back to wait for the commit log to advance. with assert_changes(consumer.paused, [], [Partition(topic, 0)]), assert_does_not_change( consumer.tell, {Partition(topic, 0): messages[4].offset}): assert synchronized_consumer.poll(0.0) is None wait_for_consumer( commit_log_consumer, producer.produce( commit_log_topic, commit_codec.encode( Commit("leader-a", Partition(topic, 0), messages[5].offset)), ).result(), ) # The consumer should be able to resume consuming. with assert_changes(consumer.paused, [Partition(topic, 0)], []), assert_changes( consumer.tell, {Partition(topic, 0): messages[4].offset}, {Partition(topic, 0): messages[4].next_offset}, ): assert synchronized_consumer.poll(0.0) == messages[4]
def test_parallel_transform_step() -> None: next_step = Mock() messages = [ Message( Partition(Topic("test"), 0), i, KafkaPayload(None, b"\x00" * size, None), datetime.now(), ) for i, size in enumerate([1000, 1000, 2000, 2000]) ] starting_processes = get_subprocess_count() worker_processes = 2 manager_processes = 1 metrics = TestingMetricsBackend() with assert_changes( get_subprocess_count, starting_processes, starting_processes + worker_processes + manager_processes, ), assert_changes( lambda: metrics.calls, [], [ GaugeCall("batches_in_progress", value, tags=None) for value in [0.0, 1.0, 2.0] ], ): transform_step = ParallelTransformStep( transform_payload_expand, next_step, processes=worker_processes, max_batch_size=5, max_batch_time=60, input_block_size=4096, output_block_size=4096, metrics=metrics, ) for message in messages: transform_step.poll() transform_step.submit(message) transform_step.close() metrics.calls.clear() with assert_changes( get_subprocess_count, starting_processes + worker_processes + manager_processes, starting_processes, ), assert_changes( lambda: metrics.calls, [], [ GaugeCall("batches_in_progress", value, tags=None) for value in [1.0, 0.0] ], ): transform_step.join() assert next_step.submit.call_count == len(messages)
def test_stream_processor_lifecycle() -> None: topic = Topic("topic") consumer = mock.Mock() strategy = mock.Mock() factory = mock.Mock() factory.create.return_value = strategy metrics = TestingMetricsBackend() with assert_changes(lambda: consumer.subscribe.call_count, 0, 1): processor: StreamProcessor[int] = StreamProcessor( consumer, topic, factory, metrics) # The processor should accept heartbeat messages without an assignment or # active processor. consumer.poll.return_value = None processor._run_once() message = Message(Partition(topic, 0), 0, 0, datetime.now()) # XXX: ``call().args``, ``call().kwargs`` are not available until 3.8 subscribe_args, subscribe_kwargs = consumer.subscribe.call_args assert subscribe_args[0] == [topic] assignment_callback = subscribe_kwargs["on_assign"] revocation_callback = subscribe_kwargs["on_revoke"] # Assignment should succeed if no assignment already exxists. offsets = {Partition(topic, 0): 0} assignment_callback(offsets) # If ``Consumer.poll`` doesn't return a message, we should poll the # processing strategy, but not submit anything for processing. consumer.poll.return_value = None with assert_changes(lambda: strategy.poll.call_count, 0, 1), assert_does_not_change( lambda: strategy.submit.call_count, 0): processor._run_once() # If ``Consumer.poll`` **does** return a message, we should poll the # processing strategy and submit the message for processing. consumer.poll.return_value = message with assert_changes(lambda: strategy.poll.call_count, 1, 2), assert_changes(lambda: strategy.submit.call_count, 0, 1): processor._run_once() assert strategy.submit.call_args_list[-1] == mock.call(message) # If the message is rejected by the processing strategy, the consumer # should be paused and the message should be held for later. consumer.tell.return_value = offsets consumer.poll.return_value = message strategy.submit.side_effect = MessageRejected() with assert_changes(lambda: consumer.pause.call_count, 0, 1): processor._run_once() assert strategy.submit.call_args_list[-1] == mock.call(message) # If ``Consumer.poll`` returns a message when we expect it to be paused, # we should raise an exception. with pytest.raises(InvalidStateError): processor._run_once() # Once the message is accepted by the processing strategy, the consumer # should be resumed. consumer.poll.return_value = None strategy.submit.return_value = None strategy.submit.side_effect = None with assert_changes(lambda: consumer.resume.call_count, 0, 1): processor._run_once() assert strategy.submit.call_args_list[-1] == mock.call(message) metric = metrics.calls[0] assert isinstance(metric, Timing) assert metric.name == "pause_duration_ms" # Assignment should fail if one already exists. with pytest.raises(InvalidStateError): assignment_callback({Partition(topic, 0): 0}) # Revocation should succeed with an active assignment, and cause the # strategy instance to be closed. with assert_changes(lambda: strategy.close.call_count, 0, 1): revocation_callback([Partition(topic, 0)]) # Revocation should fail without an active assignment. with pytest.raises(InvalidStateError): revocation_callback([Partition(topic, 0)]) # The processor should not accept non-heartbeat messages without an # assignment or active processor. consumer.poll.return_value = message with pytest.raises(InvalidStateError): processor._run_once() with assert_changes(lambda: consumer.close.call_count, 0, 1): processor._shutdown()
def test_tick_consumer_non_monotonic() -> None: topic = Topic("messages") partition = Partition(topic, 0) clock = TestingClock(epoch.timestamp()) broker: DummyBroker[int] = DummyBroker(clock) broker.create_topic(topic, partitions=1) producer: DummyProducer[int] = DummyProducer(broker) inner_consumer: Consumer[int] = DummyConsumer(broker, "group") consumer = TickConsumer(inner_consumer) consumer.subscribe([topic]) producer.produce(partition, 0) clock.sleep(1) producer.produce(partition, 1) with assert_changes(inner_consumer.tell, {partition: 0}, {partition: 1}), assert_does_not_change( consumer.tell, {partition: 0}): assert consumer.poll() is None with assert_changes(inner_consumer.tell, {partition: 1}, {partition: 2}), assert_changes( consumer.tell, {partition: 0}, {partition: 1}): assert consumer.poll() == Message( partition, 0, Tick( offsets=Interval(0, 1), timestamps=Interval(epoch, epoch + timedelta(seconds=1)), ), epoch + timedelta(seconds=1), ) clock.sleep(-1) producer.produce(partition, 2) with assert_changes(inner_consumer.tell, {partition: 2}, {partition: 3}), assert_does_not_change( consumer.tell, {partition: 1}): assert consumer.poll() is None clock.sleep(2) producer.produce(partition, 3) with assert_changes(inner_consumer.tell, {partition: 3}, {partition: 4}), assert_changes( consumer.tell, {partition: 1}, {partition: 3}): assert consumer.poll() == Message( partition, 1, Tick( offsets=Interval(1, 3), timestamps=Interval(epoch + timedelta(seconds=1), epoch + timedelta(seconds=2)), ), epoch + timedelta(seconds=2), )