Exemple #1
0
def test_encoding_producer() -> None:
    broker: Broker[str] = Broker(MemoryMessageStorage(), TestingClock())

    topic = Topic("test")
    broker.create_topic(topic, 1)

    class ReverseEncoder(Encoder[str, str]):
        def encode(self, value: str) -> str:
            return "".join(value[::-1])

    producer = ProducerEncodingWrapper(broker.get_producer(), ReverseEncoder())
    decoded_message = producer.produce(topic, "hello").result()
    assert decoded_message.payload == "hello"

    consumer = broker.get_consumer("group")
    consumer.subscribe([topic])

    encoded_message = consumer.poll()
    assert encoded_message is not None

    # The payload returned by the consumer should not be decoded.
    assert encoded_message.payload == "olleh"

    # All other attributes should be the same.
    for attribute in set(Message.__slots__) - {"payload"}:
        assert getattr(encoded_message,
                       attribute) == getattr(decoded_message, attribute)
Exemple #2
0
def test_tick_consumer_min_interval() -> None:
    clock = TestingClock()
    broker: Broker[int] = Broker(MemoryMessageStorage(), clock)

    topic = Topic("messages")

    broker.create_topic(topic, partitions=2)

    producer = broker.get_producer()
    for payload in range(3):
        producer.produce(Partition(topic, 0), payload).result()
        clock.sleep(1.0)

    inner_consumer = broker.get_consumer("group")

    consumer = TickConsumer(inner_consumer, min_interval=timedelta(seconds=2))

    consumer.subscribe([topic])

    assert consumer.poll() is None
    assert consumer.poll() is None
    message = consumer.poll()
    assert message is not None
    tick = message.payload
    assert tick.offsets.upper - tick.offsets.lower == 2
    assert tick.timestamps.upper - tick.timestamps.lower == timedelta(seconds=2)
Exemple #3
0
def test_subscription_worker_consistent(
        subscription_data: SubscriptionData) -> None:
    state.set_config("event_subscription_non_consistent_sample_rate", 1)
    broker: Broker[SubscriptionTaskResult] = Broker(MemoryMessageStorage(),
                                                    TestingClock())

    result_topic = Topic("subscription-results")

    broker.create_topic(result_topic, partitions=1)

    frequency = timedelta(minutes=1)
    evaluations = 1

    subscription = Subscription(
        SubscriptionIdentifier(PartitionId(0), uuid1()),
        subscription_data,
    )

    store = DummySubscriptionDataStore()
    store.create(subscription.identifier.uuid, subscription.data)

    metrics = TestingMetricsBackend()

    dataset = get_dataset("events")
    worker = SubscriptionWorker(
        dataset,
        ThreadPoolExecutor(),
        {
            0:
            SubscriptionScheduler(store, PartitionId(0), timedelta(),
                                  DummyMetricsBackend(strict=True))
        },
        broker.get_producer(),
        result_topic,
        metrics,
    )

    now = datetime(2000, 1, 1)

    tick = Tick(
        offsets=Interval(0, 1),
        timestamps=Interval(now - (frequency * evaluations), now),
    )

    worker.process_message(Message(Partition(Topic("events"), 0), 0, tick,
                                   now))

    time.sleep(0.1)

    assert (len([
        m for m in metrics.calls
        if isinstance(m, Increment) and m.name == "consistent"
    ]) == 1)
def test_execute_and_produce_result() -> None:
    state.set_config("subscription_mode_events", "new")
    dataset = get_dataset("events")
    entity_names = ["events"]
    max_concurrent_queries = 2
    total_concurrent_queries = 2
    metrics = TestingMetricsBackend()

    scheduled_topic = Topic("scheduled-subscriptions-events")
    result_topic = Topic("events-subscriptions-results")
    clock = TestingClock()
    broker_storage: MemoryMessageStorage[KafkaPayload] = MemoryMessageStorage()
    broker: Broker[KafkaPayload] = Broker(broker_storage, clock)
    broker.create_topic(scheduled_topic, partitions=1)
    broker.create_topic(result_topic, partitions=1)
    producer = broker.get_producer()

    commit = mock.Mock()

    strategy = ExecuteQuery(
        dataset,
        entity_names,
        max_concurrent_queries,
        total_concurrent_queries,
        None,
        metrics,
        ProduceResult(producer, result_topic.name, commit),
        commit,
    )

    subscription_identifier = SubscriptionIdentifier(PartitionId(0),
                                                     uuid.uuid1())

    make_message = generate_message(EntityKey.EVENTS, subscription_identifier)
    message = next(make_message)
    strategy.submit(message)

    # Eventually a message should be produced and offsets committed
    while (broker_storage.consume(Partition(result_topic, 0), 0) is None
           or commit.call_count == 0):
        strategy.poll()

    produced_message = broker_storage.consume(Partition(result_topic, 0), 0)
    assert produced_message is not None
    assert produced_message.payload.key == str(subscription_identifier).encode(
        "utf-8")
    assert commit.call_count == 1
def test_skip_stale_message() -> None:
    dataset = get_dataset("events")
    entity_names = ["events"]
    max_concurrent_queries = 2
    total_concurrent_queries = 2
    metrics = TestingMetricsBackend()

    scheduled_topic = Topic("scheduled-subscriptions-events")
    result_topic = Topic("events-subscriptions-results")
    clock = TestingClock()
    broker_storage: MemoryMessageStorage[KafkaPayload] = MemoryMessageStorage()
    broker: Broker[KafkaPayload] = Broker(broker_storage, clock)
    broker.create_topic(scheduled_topic, partitions=1)
    broker.create_topic(result_topic, partitions=1)
    producer = broker.get_producer()

    commit = mock.Mock()

    stale_threshold_seconds = 60

    strategy = ExecuteQuery(
        dataset,
        entity_names,
        max_concurrent_queries,
        total_concurrent_queries,
        stale_threshold_seconds,
        metrics,
        ProduceResult(producer, result_topic.name, commit),
        commit,
    )

    subscription_identifier = SubscriptionIdentifier(PartitionId(0),
                                                     uuid.uuid1())

    make_message = generate_message(EntityKey.EVENTS, subscription_identifier)
    message = next(make_message)
    strategy.submit(message)

    # No message will be produced
    strategy.poll()
    assert broker_storage.consume(Partition(result_topic, 0), 0) is None
    assert Increment("skipped_execution", 1,
                     {"entity": "events"}) in metrics.calls
def test_invalid_commit_log_message(caplog: Any) -> None:
    clock = TestingClock()
    broker: Broker[KafkaPayload] = Broker(MemoryMessageStorage(), clock)

    topic = Topic("messages")
    followed_consumer_group = "events"
    partition = Partition(topic, 0)

    broker.create_topic(topic, partitions=1)

    producer = broker.get_producer()

    inner_consumer = broker.get_consumer("group")

    consumer = CommitLogTickConsumer(inner_consumer, followed_consumer_group)

    def _assignment_callback(offsets: Mapping[Partition, int]) -> None:
        assert inner_consumer.tell() == {partition: 0}
        assert consumer.tell() == {partition: 0}

    assignment_callback = mock.Mock(side_effect=_assignment_callback)

    consumer.subscribe([topic], on_assign=assignment_callback)

    # produce invalid payload to commit log topic (key should not be None)
    producer.produce(
        partition,
        KafkaPayload(None, b"some-value", []),
    ).result()

    clock.sleep(1)

    with caplog.at_level(logging.ERROR):
        assert consumer.poll() is None

    assert followed_consumer_group in caplog.text
def test_tick_consumer_non_monotonic() -> None:
    clock = TestingClock()
    broker: Broker[KafkaPayload] = Broker(MemoryMessageStorage(), clock)

    epoch = datetime.fromtimestamp(clock.time())

    topic = Topic("messages")
    followed_consumer_group = "events"
    partition = Partition(topic, 0)

    broker.create_topic(topic, partitions=1)

    producer = broker.get_producer()

    inner_consumer = broker.get_consumer("group")

    consumer = CommitLogTickConsumer(inner_consumer, followed_consumer_group)

    def _assignment_callback(offsets: Mapping[Partition, int]) -> None:
        assert inner_consumer.tell() == {partition: 0}
        assert consumer.tell() == {partition: 0}

    assignment_callback = mock.Mock(side_effect=_assignment_callback)

    consumer.subscribe([topic], on_assign=assignment_callback)

    producer.produce(
        partition,
        commit_codec.encode(
            Commit(followed_consumer_group, partition, 0, epoch)),
    ).result()

    clock.sleep(1)

    producer.produce(
        partition,
        commit_codec.encode(
            Commit(followed_consumer_group, partition, 1,
                   epoch + timedelta(seconds=1))),
    ).result()

    with assert_changes(lambda: assignment_callback.called, False, True):
        assert consumer.poll() is None

    assert consumer.tell() == {partition: 1}

    with assert_changes(consumer.tell, {partition: 1}, {partition: 2}):
        assert consumer.poll() == Message(
            partition,
            1,
            Tick(
                0,
                offsets=Interval(0, 1),
                timestamps=Interval(epoch, epoch + timedelta(seconds=1)),
            ),
            epoch + timedelta(seconds=1),
        )

    clock.sleep(-1)

    producer.produce(
        partition,
        commit_codec.encode(
            Commit(followed_consumer_group, partition, 2, epoch)),
    ).result()

    with assert_changes(consumer.tell, {partition: 2}, {partition: 3}):
        assert consumer.poll() is None

    clock.sleep(2)

    producer.produce(
        partition,
        commit_codec.encode(
            Commit(followed_consumer_group, partition, 3,
                   epoch + timedelta(seconds=2))),
    ).result()

    with assert_changes(consumer.tell, {partition: 3}, {partition: 4}):
        assert consumer.poll() == Message(
            partition,
            3,
            Tick(
                0,
                offsets=Interval(1, 3),
                timestamps=Interval(epoch + timedelta(seconds=1),
                                    epoch + timedelta(seconds=2)),
            ),
            epoch + timedelta(seconds=2),
        )
def test_tick_consumer(time_shift: Optional[timedelta]) -> None:
    clock = TestingClock()
    broker: Broker[KafkaPayload] = Broker(MemoryMessageStorage(), clock)

    epoch = datetime.fromtimestamp(clock.time())

    topic = Topic("messages")
    followed_consumer_group = "events"

    broker.create_topic(topic, partitions=1)

    producer = broker.get_producer()

    for partition, offsets in enumerate([[0, 1, 2], [0]]):
        for offset in offsets:
            payload = commit_codec.encode(
                Commit(followed_consumer_group, Partition(topic, partition),
                       offset, epoch))
            producer.produce(Partition(topic, 0), payload).result()

    inner_consumer = broker.get_consumer("group")

    consumer = CommitLogTickConsumer(inner_consumer,
                                     followed_consumer_group,
                                     time_shift=time_shift)

    if time_shift is None:
        time_shift = timedelta()

    def _assignment_callback(offsets: Mapping[Partition, int]) -> None:
        assert consumer.tell() == {
            Partition(topic, 0): 0,
        }

    assignment_callback = mock.Mock(side_effect=_assignment_callback)

    consumer.subscribe([topic], on_assign=assignment_callback)

    with assert_changes(lambda: assignment_callback.called, False, True):
        # consume 0, 0
        assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 1,
    }

    # consume 0, 1
    assert consumer.poll() == Message(
        Partition(topic, 0),
        1,
        Tick(0, offsets=Interval(0, 1),
             timestamps=Interval(epoch, epoch)).time_shift(time_shift),
        epoch,
    )

    assert consumer.tell() == {
        Partition(topic, 0): 2,
    }

    # consume 0, 2
    assert consumer.poll() == Message(
        Partition(topic, 0),
        2,
        Tick(0, offsets=Interval(1, 2),
             timestamps=Interval(epoch, epoch)).time_shift(time_shift),
        epoch,
    )

    assert consumer.tell() == {
        Partition(topic, 0): 3,
    }

    # consume 1, 0
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 4,
    }

    # consume no message
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 4,
    }

    consumer.seek({Partition(topic, 0): 1})

    assert consumer.tell() == {
        Partition(topic, 0): 1,
    }

    # consume 0, 1
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 2,
    }

    # consume 0, 2
    assert consumer.poll() == Message(
        Partition(topic, 0),
        2,
        Tick(0, offsets=Interval(1, 2),
             timestamps=Interval(epoch, epoch)).time_shift(time_shift),
        epoch,
    )

    assert consumer.tell() == {
        Partition(topic, 0): 3,
    }

    with pytest.raises(ConsumerError):
        consumer.seek({Partition(topic, -1): 0})
Exemple #9
0
def test_produce_step() -> None:
    topic = Topic("snuba-metrics")
    partition = Partition(topic, 0)

    clock = Clock()
    broker_storage: MemoryMessageStorage[KafkaPayload] = MemoryMessageStorage()
    broker: Broker[KafkaPayload] = Broker(broker_storage, clock)
    broker.create_topic(topic, partitions=1)
    producer = broker.get_producer()

    commit = Mock()

    produce_step = ProduceStep(commit_function=commit, producer=producer)

    message_payloads = [counter_payload, distribution_payload, set_payload]
    message_batch = [
        Message(
            Partition(Topic("topic"), 0),
            i + 1,
            KafkaPayload(
                None,
                json.dumps(__translated_payload(
                    message_payloads[i])).encode("utf-8"), []),
            datetime.now(),
        ) for i, payload in enumerate(message_payloads)
    ]
    # the outer message uses the last message's partition, offset, and timestamp
    last = message_batch[-1]
    outer_message = Message(last.partition, last.offset, message_batch,
                            last.timestamp)

    # 1. Submit the message (that would have been generated from process_messages)
    produce_step.submit(outer_message=outer_message)

    # 2. Check that submit created the same number of futures as
    #    messages in the outer_message (3 in this test). Also check
    #    that the produced message payloads are as expected.
    assert len(produce_step._ProduceStep__futures) == 3

    first_message = broker_storage.consume(partition, 0)
    assert first_message is not None

    second_message = broker_storage.consume(partition, 1)
    assert second_message is not None

    third_message = broker_storage.consume(partition, 2)
    assert third_message is not None

    assert broker_storage.consume(partition, 3) is None

    produced_messages = [
        json.loads(msg.payload.value.decode("utf-8"), use_rapid_json=True)
        for msg in [first_message, second_message, third_message]
    ]
    expected_produced_messages = []
    for payload in message_payloads:
        translated = __translated_payload(payload)
        tags: Mapping[str, int] = {
            str(k): v
            for k, v in translated["tags"].items()
        }
        translated.update(**{"tags": tags})
        expected_produced_messages.append(translated)

    assert produced_messages == expected_produced_messages

    # 3. Call poll method, and check that doing so checked that
    #    futures were ready and successful and therefore messages
    #    were committed.
    produce_step.poll()
    expected_commit_calls = [
        call({message.partition: Position(message.offset, message.timestamp)})
        for message in message_batch
    ]
    assert commit.call_args_list == expected_commit_calls

    produce_step.close()
    produce_step.join()
Exemple #10
0
def test_tick_consumer_non_monotonic() -> None:
    clock = TestingClock()
    broker: Broker[int] = Broker(MemoryMessageStorage(), clock)

    epoch = datetime.fromtimestamp(clock.time())

    topic = Topic("messages")
    partition = Partition(topic, 0)

    broker.create_topic(topic, partitions=1)

    producer = broker.get_producer()

    inner_consumer = broker.get_consumer("group")

    consumer = TickConsumer(inner_consumer)

    def _assignment_callback(offsets: Mapping[Partition, int]) -> None:
        assert inner_consumer.tell() == {partition: 0}
        assert consumer.tell() == {partition: 0}

    assignment_callback = mock.Mock(side_effect=_assignment_callback)

    consumer.subscribe([topic], on_assign=assignment_callback)

    producer.produce(partition, 0)

    clock.sleep(1)

    producer.produce(partition, 1)

    with assert_changes(lambda: assignment_callback.called, False, True):
        assert consumer.poll() is None

    assert inner_consumer.tell() == {partition: 1}
    assert consumer.tell() == {partition: 0}

    with assert_changes(
        inner_consumer.tell, {partition: 1}, {partition: 2}
    ), assert_changes(consumer.tell, {partition: 0}, {partition: 1}):
        assert consumer.poll() == Message(
            partition,
            0,
            Tick(
                offsets=Interval(0, 1),
                timestamps=Interval(epoch, epoch + timedelta(seconds=1)),
            ),
            epoch + timedelta(seconds=1),
        )

    clock.sleep(-1)

    producer.produce(partition, 2)

    with assert_changes(
        inner_consumer.tell, {partition: 2}, {partition: 3}
    ), assert_does_not_change(consumer.tell, {partition: 1}):
        assert consumer.poll() is None

    clock.sleep(2)

    producer.produce(partition, 3)

    with assert_changes(
        inner_consumer.tell, {partition: 3}, {partition: 4}
    ), assert_changes(consumer.tell, {partition: 1}, {partition: 3}):
        assert consumer.poll() == Message(
            partition,
            1,
            Tick(
                offsets=Interval(1, 3),
                timestamps=Interval(
                    epoch + timedelta(seconds=1), epoch + timedelta(seconds=2)
                ),
            ),
            epoch + timedelta(seconds=2),
        )
Exemple #11
0
def test_subscription_worker(subscription_data: SubscriptionData) -> None:
    broker: Broker[SubscriptionTaskResult] = Broker(MemoryMessageStorage(),
                                                    TestingClock())

    result_topic = Topic("subscription-results")

    broker.create_topic(result_topic, partitions=1)

    frequency = timedelta(minutes=1)
    evaluations = 3

    subscription = Subscription(
        SubscriptionIdentifier(PartitionId(0), uuid1()),
        subscription_data,
    )

    store = DummySubscriptionDataStore()
    store.create(subscription.identifier.uuid, subscription.data)

    metrics = DummyMetricsBackend(strict=True)

    dataset = get_dataset("events")
    worker = SubscriptionWorker(
        dataset,
        ThreadPoolExecutor(),
        {
            0: SubscriptionScheduler(store, PartitionId(0), timedelta(),
                                     metrics)
        },
        broker.get_producer(),
        result_topic,
        metrics,
    )

    now = datetime(2000, 1, 1)

    tick = Tick(
        offsets=Interval(0, 1),
        timestamps=Interval(now - (frequency * evaluations), now),
    )

    result_futures = worker.process_message(
        Message(Partition(Topic("events"), 0), 0, tick, now))

    assert result_futures is not None and len(result_futures) == evaluations

    # Publish the results.
    worker.flush_batch([result_futures])

    # Check to make sure the results were published.
    # NOTE: This does not cover the ``SubscriptionTaskResultCodec``!
    consumer = broker.get_consumer("group")
    consumer.subscribe([result_topic])

    for i in range(evaluations):
        timestamp = now - frequency * (evaluations - i)

        message = consumer.poll()
        assert message is not None
        assert message.partition.topic == result_topic

        task, future = result_futures[i]
        future_result = request, result = future.result()
        assert message.payload.task.timestamp == timestamp
        assert message.payload == SubscriptionTaskResult(task, future_result)

        # NOTE: The time series extension is folded back into the request
        # body, ideally this would reference the timeseries options in
        # isolation.
        from_pattern = FunctionCall(
            String(ConditionFunctions.GTE),
            (
                Column(None, String("timestamp")),
                Literal(Datetime(timestamp - subscription.data.time_window)),
            ),
        )
        to_pattern = FunctionCall(
            String(ConditionFunctions.LT),
            (Column(None, String("timestamp")), Literal(Datetime(timestamp))),
        )

        condition = request.query.get_condition()
        assert condition is not None

        conditions = get_first_level_and_conditions(condition)

        assert any([from_pattern.match(e) for e in conditions])
        assert any([to_pattern.match(e) for e in conditions])

        assert result == {
            "meta": [{
                "name": "count",
                "type": "UInt64"
            }],
            "data": [{
                "count": 0
            }],
        }
def test_produce_result() -> None:
    state.set_config("subscription_mode_events", "new")
    epoch = datetime(1970, 1, 1)
    scheduled_topic = Topic("scheduled-subscriptions-events")
    result_topic = Topic("events-subscriptions-results")
    clock = TestingClock()
    broker_storage: MemoryMessageStorage[KafkaPayload] = MemoryMessageStorage()
    broker: Broker[KafkaPayload] = Broker(broker_storage, clock)
    broker.create_topic(scheduled_topic, partitions=1)
    broker.create_topic(result_topic, partitions=1)

    producer = broker.get_producer()
    commit = mock.Mock()

    strategy = ProduceResult(producer, result_topic.name, commit)

    subscription_data = SubscriptionData(
        project_id=1,
        query="MATCH (events) SELECT count() AS count",
        time_window_sec=60,
        resolution_sec=60,
        entity_subscription=EventsSubscription(data_dict={}),
    )

    subscription = Subscription(
        SubscriptionIdentifier(PartitionId(0), uuid.uuid1()),
        subscription_data)

    request = subscription_data.build_request(get_dataset("events"), epoch,
                                              None, Timer("timer"))
    result: Result = {
        "meta": [{
            "type": "UInt64",
            "name": "count"
        }],
        "data": [{
            "count": 1
        }],
    }

    message = Message(
        Partition(scheduled_topic, 0),
        1,
        SubscriptionTaskResult(
            ScheduledSubscriptionTask(
                epoch,
                SubscriptionWithMetadata(EntityKey.EVENTS, subscription, 1),
            ),
            (request, result),
        ),
        epoch,
    )

    strategy.submit(message)

    produced_message = broker_storage.consume(Partition(result_topic, 0), 0)
    assert produced_message is not None
    assert produced_message.payload.key == str(
        subscription.identifier).encode("utf-8")
    assert broker_storage.consume(Partition(result_topic, 0), 1) is None
    assert commit.call_count == 0
    strategy.poll()
    assert commit.call_count == 1

    # Commit is throttled so if we immediately submit another message, the commit count will not change
    strategy.submit(message)
    strategy.poll()
    assert commit.call_count == 1

    # Commit count immediately increases once we call join()
    strategy.join()
    assert commit.call_count == 2