Exemple #1
0
def _batch_message_set_up(next_step: Mock,
                          max_batch_time: float = 100.0,
                          max_batch_size: int = 2):
    # batch time is in seconds
    batch_messages_step = BatchMessages(next_step=next_step,
                                        max_batch_time=max_batch_time,
                                        max_batch_size=max_batch_size)

    message1 = Message(Partition(Topic("topic"), 0), 1,
                       KafkaPayload(None, b"some value", []), datetime.now())
    message2 = Message(Partition(Topic("topic"), 0), 2,
                       KafkaPayload(None, b"another value", []),
                       datetime.now())
    return (batch_messages_step, message1, message2)
Exemple #2
0
def test_process_messages(mock_indexer, mock_task) -> None:
    message_payloads = [counter_payload, distribution_payload, set_payload]
    message_batch = [
        Message(
            Partition(Topic("topic"), 0),
            i + 1,
            KafkaPayload(None,
                         json.dumps(payload).encode("utf-8"), []),
            datetime.now(),
        ) for i, payload in enumerate(message_payloads)
    ]
    # the outer message uses the last message's partition, offset, and timestamp
    last = message_batch[-1]
    outer_message = Message(last.partition, last.offset, message_batch,
                            last.timestamp)

    new_batch = process_messages(outer_message=outer_message)
    expected_new_batch = [
        Message(
            m.partition,
            m.offset,
            KafkaPayload(
                None,
                json.dumps(__translated_payload(
                    message_payloads[i])).encode("utf-8"),
                [],
            ),
            m.timestamp,
        ) for i, m in enumerate(message_batch)
    ]

    assert new_batch == expected_new_batch
Exemple #3
0
def test_mock_consumer() -> None:
    storage = get_writable_storage(StorageKey.ERRORS)

    strategy = KafkaConsumerStrategyFactory(
        None,
        lambda message: None,
        build_mock_batch_writer(storage, True, TestingMetricsBackend(), 100,
                                50),
        max_batch_size=1,
        max_batch_time=1,
        processes=None,
        input_block_size=None,
        output_block_size=None,
        initialize_parallel_transform=None,
    ).create(lambda message: None)

    strategy.submit(
        Message(
            Partition(Topic("events"), 0),
            1,
            KafkaPayload(None, b"INVALID MESSAGE", []),
            datetime.now(),
        ))
    strategy.close()
    strategy.join()

    # If the mock was not applied correctly we would have data in Clickhouse
    reader = storage.get_cluster().get_reader()
    result = reader.execute(
        FormattedQuery([StringNode("SELECT count() as c from errors_local")]))
    assert result["data"] == [{"c": 0}]
Exemple #4
0
def test_metrics_batch_builder():
    max_batch_time = 3.0  # seconds
    max_batch_size = 2

    # 1. Ready when max_batch_size is reached
    batch_builder_size = MetricsBatchBuilder(max_batch_size=max_batch_size,
                                             max_batch_time=max_batch_time)

    assert not batch_builder_size.ready()

    message1 = Message(Partition(Topic("topic"), 0), 1,
                       KafkaPayload(None, b"some value", []), datetime.now())
    batch_builder_size.append(message1)
    assert not batch_builder_size.ready()

    message2 = Message(Partition(Topic("topic"), 0), 2,
                       KafkaPayload(None, b"another value", []),
                       datetime.now())
    batch_builder_size.append(message2)
    assert batch_builder_size.ready()

    # 2. Ready when max_batch_time is reached
    batch_builder_time = MetricsBatchBuilder(max_batch_size=max_batch_size,
                                             max_batch_time=max_batch_time)

    assert not batch_builder_time.ready()

    message1 = Message(Partition(Topic("topic"), 0), 1,
                       KafkaPayload(None, b"some value", []), datetime.now())
    batch_builder_time.append(message1)
    assert not batch_builder_time.ready()

    time.sleep(3)
    assert batch_builder_time.ready()

    # 3. Adding the same message twice to the same batch
    batch_builder_time = MetricsBatchBuilder(max_batch_size=max_batch_size,
                                             max_batch_time=max_batch_time)
    message1 = Message(Partition(Topic("topic"), 0), 1,
                       KafkaPayload(None, b"some value", []), datetime.now())
    batch_builder_time.append(message1)
    with pytest.raises(DuplicateMessage):
        batch_builder_time.append(message1)
Exemple #5
0
def test_produce_step() -> None:
    topic = Topic("snuba-metrics")
    partition = Partition(topic, 0)

    clock = Clock()
    broker_storage: MemoryMessageStorage[KafkaPayload] = MemoryMessageStorage()
    broker: Broker[KafkaPayload] = Broker(broker_storage, clock)
    broker.create_topic(topic, partitions=1)
    producer = broker.get_producer()

    commit = Mock()

    produce_step = ProduceStep(commit_function=commit, producer=producer)

    message_payloads = [counter_payload, distribution_payload, set_payload]
    message_batch = [
        Message(
            Partition(Topic("topic"), 0),
            i + 1,
            KafkaPayload(
                None,
                json.dumps(__translated_payload(
                    message_payloads[i])).encode("utf-8"), []),
            datetime.now(),
        ) for i, payload in enumerate(message_payloads)
    ]
    # the outer message uses the last message's partition, offset, and timestamp
    last = message_batch[-1]
    outer_message = Message(last.partition, last.offset, message_batch,
                            last.timestamp)

    # 1. Submit the message (that would have been generated from process_messages)
    produce_step.submit(outer_message=outer_message)

    # 2. Check that submit created the same number of futures as
    #    messages in the outer_message (3 in this test). Also check
    #    that the produced message payloads are as expected.
    assert len(produce_step._ProduceStep__futures) == 3

    first_message = broker_storage.consume(partition, 0)
    assert first_message is not None

    second_message = broker_storage.consume(partition, 1)
    assert second_message is not None

    third_message = broker_storage.consume(partition, 2)
    assert third_message is not None

    assert broker_storage.consume(partition, 3) is None

    produced_messages = [
        json.loads(msg.payload.value.decode("utf-8"), use_rapid_json=True)
        for msg in [first_message, second_message, third_message]
    ]
    expected_produced_messages = []
    for payload in message_payloads:
        translated = __translated_payload(payload)
        tags: Mapping[str, int] = {
            str(k): v
            for k, v in translated["tags"].items()
        }
        translated.update(**{"tags": tags})
        expected_produced_messages.append(translated)

    assert produced_messages == expected_produced_messages

    # 3. Call poll method, and check that doing so checked that
    #    futures were ready and successful and therefore messages
    #    were committed.
    produce_step.poll()
    expected_commit_calls = [
        call({message.partition: Position(message.offset, message.timestamp)})
        for message in message_batch
    ]
    assert commit.call_args_list == expected_commit_calls

    produce_step.close()
    produce_step.join()
def invalid_message() -> Message[KafkaPayload]:
    invalid_payload = KafkaPayload(None, b"", [])
    return Message(Partition(Topic(""), 0), 0, invalid_payload, datetime.now())