def _batch_message_set_up(next_step: Mock, max_batch_time: float = 100.0, max_batch_size: int = 2): # batch time is in seconds batch_messages_step = BatchMessages(next_step=next_step, max_batch_time=max_batch_time, max_batch_size=max_batch_size) message1 = Message(Partition(Topic("topic"), 0), 1, KafkaPayload(None, b"some value", []), datetime.now()) message2 = Message(Partition(Topic("topic"), 0), 2, KafkaPayload(None, b"another value", []), datetime.now()) return (batch_messages_step, message1, message2)
def test_process_messages(mock_indexer, mock_task) -> None: message_payloads = [counter_payload, distribution_payload, set_payload] message_batch = [ Message( Partition(Topic("topic"), 0), i + 1, KafkaPayload(None, json.dumps(payload).encode("utf-8"), []), datetime.now(), ) for i, payload in enumerate(message_payloads) ] # the outer message uses the last message's partition, offset, and timestamp last = message_batch[-1] outer_message = Message(last.partition, last.offset, message_batch, last.timestamp) new_batch = process_messages(outer_message=outer_message) expected_new_batch = [ Message( m.partition, m.offset, KafkaPayload( None, json.dumps(__translated_payload( message_payloads[i])).encode("utf-8"), [], ), m.timestamp, ) for i, m in enumerate(message_batch) ] assert new_batch == expected_new_batch
def test_mock_consumer() -> None: storage = get_writable_storage(StorageKey.ERRORS) strategy = KafkaConsumerStrategyFactory( None, lambda message: None, build_mock_batch_writer(storage, True, TestingMetricsBackend(), 100, 50), max_batch_size=1, max_batch_time=1, processes=None, input_block_size=None, output_block_size=None, initialize_parallel_transform=None, ).create(lambda message: None) strategy.submit( Message( Partition(Topic("events"), 0), 1, KafkaPayload(None, b"INVALID MESSAGE", []), datetime.now(), )) strategy.close() strategy.join() # If the mock was not applied correctly we would have data in Clickhouse reader = storage.get_cluster().get_reader() result = reader.execute( FormattedQuery([StringNode("SELECT count() as c from errors_local")])) assert result["data"] == [{"c": 0}]
def test_metrics_batch_builder(): max_batch_time = 3.0 # seconds max_batch_size = 2 # 1. Ready when max_batch_size is reached batch_builder_size = MetricsBatchBuilder(max_batch_size=max_batch_size, max_batch_time=max_batch_time) assert not batch_builder_size.ready() message1 = Message(Partition(Topic("topic"), 0), 1, KafkaPayload(None, b"some value", []), datetime.now()) batch_builder_size.append(message1) assert not batch_builder_size.ready() message2 = Message(Partition(Topic("topic"), 0), 2, KafkaPayload(None, b"another value", []), datetime.now()) batch_builder_size.append(message2) assert batch_builder_size.ready() # 2. Ready when max_batch_time is reached batch_builder_time = MetricsBatchBuilder(max_batch_size=max_batch_size, max_batch_time=max_batch_time) assert not batch_builder_time.ready() message1 = Message(Partition(Topic("topic"), 0), 1, KafkaPayload(None, b"some value", []), datetime.now()) batch_builder_time.append(message1) assert not batch_builder_time.ready() time.sleep(3) assert batch_builder_time.ready() # 3. Adding the same message twice to the same batch batch_builder_time = MetricsBatchBuilder(max_batch_size=max_batch_size, max_batch_time=max_batch_time) message1 = Message(Partition(Topic("topic"), 0), 1, KafkaPayload(None, b"some value", []), datetime.now()) batch_builder_time.append(message1) with pytest.raises(DuplicateMessage): batch_builder_time.append(message1)
def test_produce_step() -> None: topic = Topic("snuba-metrics") partition = Partition(topic, 0) clock = Clock() broker_storage: MemoryMessageStorage[KafkaPayload] = MemoryMessageStorage() broker: Broker[KafkaPayload] = Broker(broker_storage, clock) broker.create_topic(topic, partitions=1) producer = broker.get_producer() commit = Mock() produce_step = ProduceStep(commit_function=commit, producer=producer) message_payloads = [counter_payload, distribution_payload, set_payload] message_batch = [ Message( Partition(Topic("topic"), 0), i + 1, KafkaPayload( None, json.dumps(__translated_payload( message_payloads[i])).encode("utf-8"), []), datetime.now(), ) for i, payload in enumerate(message_payloads) ] # the outer message uses the last message's partition, offset, and timestamp last = message_batch[-1] outer_message = Message(last.partition, last.offset, message_batch, last.timestamp) # 1. Submit the message (that would have been generated from process_messages) produce_step.submit(outer_message=outer_message) # 2. Check that submit created the same number of futures as # messages in the outer_message (3 in this test). Also check # that the produced message payloads are as expected. assert len(produce_step._ProduceStep__futures) == 3 first_message = broker_storage.consume(partition, 0) assert first_message is not None second_message = broker_storage.consume(partition, 1) assert second_message is not None third_message = broker_storage.consume(partition, 2) assert third_message is not None assert broker_storage.consume(partition, 3) is None produced_messages = [ json.loads(msg.payload.value.decode("utf-8"), use_rapid_json=True) for msg in [first_message, second_message, third_message] ] expected_produced_messages = [] for payload in message_payloads: translated = __translated_payload(payload) tags: Mapping[str, int] = { str(k): v for k, v in translated["tags"].items() } translated.update(**{"tags": tags}) expected_produced_messages.append(translated) assert produced_messages == expected_produced_messages # 3. Call poll method, and check that doing so checked that # futures were ready and successful and therefore messages # were committed. produce_step.poll() expected_commit_calls = [ call({message.partition: Position(message.offset, message.timestamp)}) for message in message_batch ] assert commit.call_args_list == expected_commit_calls produce_step.close() produce_step.join()
def invalid_message() -> Message[KafkaPayload]: invalid_payload = KafkaPayload(None, b"", []) return Message(Partition(Topic(""), 0), 0, invalid_payload, datetime.now())