def test_encoding_producer() -> None: broker: Broker[str] = Broker(MemoryMessageStorage(), TestingClock()) topic = Topic("test") broker.create_topic(topic, 1) class ReverseEncoder(Encoder[str, str]): def encode(self, value: str) -> str: return "".join(value[::-1]) producer = ProducerEncodingWrapper(broker.get_producer(), ReverseEncoder()) decoded_message = producer.produce(topic, "hello").result() assert decoded_message.payload == "hello" consumer = broker.get_consumer("group") consumer.subscribe([topic]) encoded_message = consumer.poll() assert encoded_message is not None # The payload returned by the consumer should not be decoded. assert encoded_message.payload == "olleh" # All other attributes should be the same. for attribute in set(Message.__slots__) - {"payload"}: assert getattr(encoded_message, attribute) == getattr(decoded_message, attribute)
def test_tick_consumer_min_interval() -> None: clock = TestingClock() broker: Broker[int] = Broker(MemoryMessageStorage(), clock) topic = Topic("messages") broker.create_topic(topic, partitions=2) producer = broker.get_producer() for payload in range(3): producer.produce(Partition(topic, 0), payload).result() clock.sleep(1.0) inner_consumer = broker.get_consumer("group") consumer = TickConsumer(inner_consumer, min_interval=timedelta(seconds=2)) consumer.subscribe([topic]) assert consumer.poll() is None assert consumer.poll() is None message = consumer.poll() assert message is not None tick = message.payload assert tick.offsets.upper - tick.offsets.lower == 2 assert tick.timestamps.upper - tick.timestamps.lower == timedelta(seconds=2)
def test_subscription_worker_consistent( subscription_data: SubscriptionData) -> None: state.set_config("event_subscription_non_consistent_sample_rate", 1) broker: Broker[SubscriptionTaskResult] = Broker(MemoryMessageStorage(), TestingClock()) result_topic = Topic("subscription-results") broker.create_topic(result_topic, partitions=1) frequency = timedelta(minutes=1) evaluations = 1 subscription = Subscription( SubscriptionIdentifier(PartitionId(0), uuid1()), subscription_data, ) store = DummySubscriptionDataStore() store.create(subscription.identifier.uuid, subscription.data) metrics = TestingMetricsBackend() dataset = get_dataset("events") worker = SubscriptionWorker( dataset, ThreadPoolExecutor(), { 0: SubscriptionScheduler(store, PartitionId(0), timedelta(), DummyMetricsBackend(strict=True)) }, broker.get_producer(), result_topic, metrics, ) now = datetime(2000, 1, 1) tick = Tick( offsets=Interval(0, 1), timestamps=Interval(now - (frequency * evaluations), now), ) worker.process_message(Message(Partition(Topic("events"), 0), 0, tick, now)) time.sleep(0.1) assert (len([ m for m in metrics.calls if isinstance(m, Increment) and m.name == "consistent" ]) == 1)
def test_execute_and_produce_result() -> None: state.set_config("subscription_mode_events", "new") dataset = get_dataset("events") entity_names = ["events"] max_concurrent_queries = 2 total_concurrent_queries = 2 metrics = TestingMetricsBackend() scheduled_topic = Topic("scheduled-subscriptions-events") result_topic = Topic("events-subscriptions-results") clock = TestingClock() broker_storage: MemoryMessageStorage[KafkaPayload] = MemoryMessageStorage() broker: Broker[KafkaPayload] = Broker(broker_storage, clock) broker.create_topic(scheduled_topic, partitions=1) broker.create_topic(result_topic, partitions=1) producer = broker.get_producer() commit = mock.Mock() strategy = ExecuteQuery( dataset, entity_names, max_concurrent_queries, total_concurrent_queries, None, metrics, ProduceResult(producer, result_topic.name, commit), commit, ) subscription_identifier = SubscriptionIdentifier(PartitionId(0), uuid.uuid1()) make_message = generate_message(EntityKey.EVENTS, subscription_identifier) message = next(make_message) strategy.submit(message) # Eventually a message should be produced and offsets committed while (broker_storage.consume(Partition(result_topic, 0), 0) is None or commit.call_count == 0): strategy.poll() produced_message = broker_storage.consume(Partition(result_topic, 0), 0) assert produced_message is not None assert produced_message.payload.key == str(subscription_identifier).encode( "utf-8") assert commit.call_count == 1
def test_skip_stale_message() -> None: dataset = get_dataset("events") entity_names = ["events"] max_concurrent_queries = 2 total_concurrent_queries = 2 metrics = TestingMetricsBackend() scheduled_topic = Topic("scheduled-subscriptions-events") result_topic = Topic("events-subscriptions-results") clock = TestingClock() broker_storage: MemoryMessageStorage[KafkaPayload] = MemoryMessageStorage() broker: Broker[KafkaPayload] = Broker(broker_storage, clock) broker.create_topic(scheduled_topic, partitions=1) broker.create_topic(result_topic, partitions=1) producer = broker.get_producer() commit = mock.Mock() stale_threshold_seconds = 60 strategy = ExecuteQuery( dataset, entity_names, max_concurrent_queries, total_concurrent_queries, stale_threshold_seconds, metrics, ProduceResult(producer, result_topic.name, commit), commit, ) subscription_identifier = SubscriptionIdentifier(PartitionId(0), uuid.uuid1()) make_message = generate_message(EntityKey.EVENTS, subscription_identifier) message = next(make_message) strategy.submit(message) # No message will be produced strategy.poll() assert broker_storage.consume(Partition(result_topic, 0), 0) is None assert Increment("skipped_execution", 1, {"entity": "events"}) in metrics.calls
def test_invalid_commit_log_message(caplog: Any) -> None: clock = TestingClock() broker: Broker[KafkaPayload] = Broker(MemoryMessageStorage(), clock) topic = Topic("messages") followed_consumer_group = "events" partition = Partition(topic, 0) broker.create_topic(topic, partitions=1) producer = broker.get_producer() inner_consumer = broker.get_consumer("group") consumer = CommitLogTickConsumer(inner_consumer, followed_consumer_group) def _assignment_callback(offsets: Mapping[Partition, int]) -> None: assert inner_consumer.tell() == {partition: 0} assert consumer.tell() == {partition: 0} assignment_callback = mock.Mock(side_effect=_assignment_callback) consumer.subscribe([topic], on_assign=assignment_callback) # produce invalid payload to commit log topic (key should not be None) producer.produce( partition, KafkaPayload(None, b"some-value", []), ).result() clock.sleep(1) with caplog.at_level(logging.ERROR): assert consumer.poll() is None assert followed_consumer_group in caplog.text
def test_tick_consumer_non_monotonic() -> None: clock = TestingClock() broker: Broker[KafkaPayload] = Broker(MemoryMessageStorage(), clock) epoch = datetime.fromtimestamp(clock.time()) topic = Topic("messages") followed_consumer_group = "events" partition = Partition(topic, 0) broker.create_topic(topic, partitions=1) producer = broker.get_producer() inner_consumer = broker.get_consumer("group") consumer = CommitLogTickConsumer(inner_consumer, followed_consumer_group) def _assignment_callback(offsets: Mapping[Partition, int]) -> None: assert inner_consumer.tell() == {partition: 0} assert consumer.tell() == {partition: 0} assignment_callback = mock.Mock(side_effect=_assignment_callback) consumer.subscribe([topic], on_assign=assignment_callback) producer.produce( partition, commit_codec.encode( Commit(followed_consumer_group, partition, 0, epoch)), ).result() clock.sleep(1) producer.produce( partition, commit_codec.encode( Commit(followed_consumer_group, partition, 1, epoch + timedelta(seconds=1))), ).result() with assert_changes(lambda: assignment_callback.called, False, True): assert consumer.poll() is None assert consumer.tell() == {partition: 1} with assert_changes(consumer.tell, {partition: 1}, {partition: 2}): assert consumer.poll() == Message( partition, 1, Tick( 0, offsets=Interval(0, 1), timestamps=Interval(epoch, epoch + timedelta(seconds=1)), ), epoch + timedelta(seconds=1), ) clock.sleep(-1) producer.produce( partition, commit_codec.encode( Commit(followed_consumer_group, partition, 2, epoch)), ).result() with assert_changes(consumer.tell, {partition: 2}, {partition: 3}): assert consumer.poll() is None clock.sleep(2) producer.produce( partition, commit_codec.encode( Commit(followed_consumer_group, partition, 3, epoch + timedelta(seconds=2))), ).result() with assert_changes(consumer.tell, {partition: 3}, {partition: 4}): assert consumer.poll() == Message( partition, 3, Tick( 0, offsets=Interval(1, 3), timestamps=Interval(epoch + timedelta(seconds=1), epoch + timedelta(seconds=2)), ), epoch + timedelta(seconds=2), )
def test_tick_consumer(time_shift: Optional[timedelta]) -> None: clock = TestingClock() broker: Broker[KafkaPayload] = Broker(MemoryMessageStorage(), clock) epoch = datetime.fromtimestamp(clock.time()) topic = Topic("messages") followed_consumer_group = "events" broker.create_topic(topic, partitions=1) producer = broker.get_producer() for partition, offsets in enumerate([[0, 1, 2], [0]]): for offset in offsets: payload = commit_codec.encode( Commit(followed_consumer_group, Partition(topic, partition), offset, epoch)) producer.produce(Partition(topic, 0), payload).result() inner_consumer = broker.get_consumer("group") consumer = CommitLogTickConsumer(inner_consumer, followed_consumer_group, time_shift=time_shift) if time_shift is None: time_shift = timedelta() def _assignment_callback(offsets: Mapping[Partition, int]) -> None: assert consumer.tell() == { Partition(topic, 0): 0, } assignment_callback = mock.Mock(side_effect=_assignment_callback) consumer.subscribe([topic], on_assign=assignment_callback) with assert_changes(lambda: assignment_callback.called, False, True): # consume 0, 0 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 1, } # consume 0, 1 assert consumer.poll() == Message( Partition(topic, 0), 1, Tick(0, offsets=Interval(0, 1), timestamps=Interval(epoch, epoch)).time_shift(time_shift), epoch, ) assert consumer.tell() == { Partition(topic, 0): 2, } # consume 0, 2 assert consumer.poll() == Message( Partition(topic, 0), 2, Tick(0, offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)).time_shift(time_shift), epoch, ) assert consumer.tell() == { Partition(topic, 0): 3, } # consume 1, 0 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 4, } # consume no message assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 4, } consumer.seek({Partition(topic, 0): 1}) assert consumer.tell() == { Partition(topic, 0): 1, } # consume 0, 1 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 2, } # consume 0, 2 assert consumer.poll() == Message( Partition(topic, 0), 2, Tick(0, offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)).time_shift(time_shift), epoch, ) assert consumer.tell() == { Partition(topic, 0): 3, } with pytest.raises(ConsumerError): consumer.seek({Partition(topic, -1): 0})
def test_produce_step() -> None: topic = Topic("snuba-metrics") partition = Partition(topic, 0) clock = Clock() broker_storage: MemoryMessageStorage[KafkaPayload] = MemoryMessageStorage() broker: Broker[KafkaPayload] = Broker(broker_storage, clock) broker.create_topic(topic, partitions=1) producer = broker.get_producer() commit = Mock() produce_step = ProduceStep(commit_function=commit, producer=producer) message_payloads = [counter_payload, distribution_payload, set_payload] message_batch = [ Message( Partition(Topic("topic"), 0), i + 1, KafkaPayload( None, json.dumps(__translated_payload( message_payloads[i])).encode("utf-8"), []), datetime.now(), ) for i, payload in enumerate(message_payloads) ] # the outer message uses the last message's partition, offset, and timestamp last = message_batch[-1] outer_message = Message(last.partition, last.offset, message_batch, last.timestamp) # 1. Submit the message (that would have been generated from process_messages) produce_step.submit(outer_message=outer_message) # 2. Check that submit created the same number of futures as # messages in the outer_message (3 in this test). Also check # that the produced message payloads are as expected. assert len(produce_step._ProduceStep__futures) == 3 first_message = broker_storage.consume(partition, 0) assert first_message is not None second_message = broker_storage.consume(partition, 1) assert second_message is not None third_message = broker_storage.consume(partition, 2) assert third_message is not None assert broker_storage.consume(partition, 3) is None produced_messages = [ json.loads(msg.payload.value.decode("utf-8"), use_rapid_json=True) for msg in [first_message, second_message, third_message] ] expected_produced_messages = [] for payload in message_payloads: translated = __translated_payload(payload) tags: Mapping[str, int] = { str(k): v for k, v in translated["tags"].items() } translated.update(**{"tags": tags}) expected_produced_messages.append(translated) assert produced_messages == expected_produced_messages # 3. Call poll method, and check that doing so checked that # futures were ready and successful and therefore messages # were committed. produce_step.poll() expected_commit_calls = [ call({message.partition: Position(message.offset, message.timestamp)}) for message in message_batch ] assert commit.call_args_list == expected_commit_calls produce_step.close() produce_step.join()
def test_tick_consumer_non_monotonic() -> None: clock = TestingClock() broker: Broker[int] = Broker(MemoryMessageStorage(), clock) epoch = datetime.fromtimestamp(clock.time()) topic = Topic("messages") partition = Partition(topic, 0) broker.create_topic(topic, partitions=1) producer = broker.get_producer() inner_consumer = broker.get_consumer("group") consumer = TickConsumer(inner_consumer) def _assignment_callback(offsets: Mapping[Partition, int]) -> None: assert inner_consumer.tell() == {partition: 0} assert consumer.tell() == {partition: 0} assignment_callback = mock.Mock(side_effect=_assignment_callback) consumer.subscribe([topic], on_assign=assignment_callback) producer.produce(partition, 0) clock.sleep(1) producer.produce(partition, 1) with assert_changes(lambda: assignment_callback.called, False, True): assert consumer.poll() is None assert inner_consumer.tell() == {partition: 1} assert consumer.tell() == {partition: 0} with assert_changes( inner_consumer.tell, {partition: 1}, {partition: 2} ), assert_changes(consumer.tell, {partition: 0}, {partition: 1}): assert consumer.poll() == Message( partition, 0, Tick( offsets=Interval(0, 1), timestamps=Interval(epoch, epoch + timedelta(seconds=1)), ), epoch + timedelta(seconds=1), ) clock.sleep(-1) producer.produce(partition, 2) with assert_changes( inner_consumer.tell, {partition: 2}, {partition: 3} ), assert_does_not_change(consumer.tell, {partition: 1}): assert consumer.poll() is None clock.sleep(2) producer.produce(partition, 3) with assert_changes( inner_consumer.tell, {partition: 3}, {partition: 4} ), assert_changes(consumer.tell, {partition: 1}, {partition: 3}): assert consumer.poll() == Message( partition, 1, Tick( offsets=Interval(1, 3), timestamps=Interval( epoch + timedelta(seconds=1), epoch + timedelta(seconds=2) ), ), epoch + timedelta(seconds=2), )
def test_subscription_worker(subscription_data: SubscriptionData) -> None: broker: Broker[SubscriptionTaskResult] = Broker(MemoryMessageStorage(), TestingClock()) result_topic = Topic("subscription-results") broker.create_topic(result_topic, partitions=1) frequency = timedelta(minutes=1) evaluations = 3 subscription = Subscription( SubscriptionIdentifier(PartitionId(0), uuid1()), subscription_data, ) store = DummySubscriptionDataStore() store.create(subscription.identifier.uuid, subscription.data) metrics = DummyMetricsBackend(strict=True) dataset = get_dataset("events") worker = SubscriptionWorker( dataset, ThreadPoolExecutor(), { 0: SubscriptionScheduler(store, PartitionId(0), timedelta(), metrics) }, broker.get_producer(), result_topic, metrics, ) now = datetime(2000, 1, 1) tick = Tick( offsets=Interval(0, 1), timestamps=Interval(now - (frequency * evaluations), now), ) result_futures = worker.process_message( Message(Partition(Topic("events"), 0), 0, tick, now)) assert result_futures is not None and len(result_futures) == evaluations # Publish the results. worker.flush_batch([result_futures]) # Check to make sure the results were published. # NOTE: This does not cover the ``SubscriptionTaskResultCodec``! consumer = broker.get_consumer("group") consumer.subscribe([result_topic]) for i in range(evaluations): timestamp = now - frequency * (evaluations - i) message = consumer.poll() assert message is not None assert message.partition.topic == result_topic task, future = result_futures[i] future_result = request, result = future.result() assert message.payload.task.timestamp == timestamp assert message.payload == SubscriptionTaskResult(task, future_result) # NOTE: The time series extension is folded back into the request # body, ideally this would reference the timeseries options in # isolation. from_pattern = FunctionCall( String(ConditionFunctions.GTE), ( Column(None, String("timestamp")), Literal(Datetime(timestamp - subscription.data.time_window)), ), ) to_pattern = FunctionCall( String(ConditionFunctions.LT), (Column(None, String("timestamp")), Literal(Datetime(timestamp))), ) condition = request.query.get_condition() assert condition is not None conditions = get_first_level_and_conditions(condition) assert any([from_pattern.match(e) for e in conditions]) assert any([to_pattern.match(e) for e in conditions]) assert result == { "meta": [{ "name": "count", "type": "UInt64" }], "data": [{ "count": 0 }], }
def test_produce_result() -> None: state.set_config("subscription_mode_events", "new") epoch = datetime(1970, 1, 1) scheduled_topic = Topic("scheduled-subscriptions-events") result_topic = Topic("events-subscriptions-results") clock = TestingClock() broker_storage: MemoryMessageStorage[KafkaPayload] = MemoryMessageStorage() broker: Broker[KafkaPayload] = Broker(broker_storage, clock) broker.create_topic(scheduled_topic, partitions=1) broker.create_topic(result_topic, partitions=1) producer = broker.get_producer() commit = mock.Mock() strategy = ProduceResult(producer, result_topic.name, commit) subscription_data = SubscriptionData( project_id=1, query="MATCH (events) SELECT count() AS count", time_window_sec=60, resolution_sec=60, entity_subscription=EventsSubscription(data_dict={}), ) subscription = Subscription( SubscriptionIdentifier(PartitionId(0), uuid.uuid1()), subscription_data) request = subscription_data.build_request(get_dataset("events"), epoch, None, Timer("timer")) result: Result = { "meta": [{ "type": "UInt64", "name": "count" }], "data": [{ "count": 1 }], } message = Message( Partition(scheduled_topic, 0), 1, SubscriptionTaskResult( ScheduledSubscriptionTask( epoch, SubscriptionWithMetadata(EntityKey.EVENTS, subscription, 1), ), (request, result), ), epoch, ) strategy.submit(message) produced_message = broker_storage.consume(Partition(result_topic, 0), 0) assert produced_message is not None assert produced_message.payload.key == str( subscription.identifier).encode("utf-8") assert broker_storage.consume(Partition(result_topic, 0), 1) is None assert commit.call_count == 0 strategy.poll() assert commit.call_count == 1 # Commit is throttled so if we immediately submit another message, the commit count will not change strategy.submit(message) strategy.poll() assert commit.call_count == 1 # Commit count immediately increases once we call join() strategy.join() assert commit.call_count == 2