def test_tick_consumer_min_interval() -> None: clock = TestingClock() broker: Broker[int] = Broker(MemoryMessageStorage(), clock) topic = Topic("messages") broker.create_topic(topic, partitions=2) producer = broker.get_producer() for payload in range(3): producer.produce(Partition(topic, 0), payload).result() clock.sleep(1.0) inner_consumer = broker.get_consumer("group") consumer = TickConsumer(inner_consumer, min_interval=timedelta(seconds=2)) consumer.subscribe([topic]) assert consumer.poll() is None assert consumer.poll() is None message = consumer.poll() assert message is not None tick = message.payload assert tick.offsets.upper - tick.offsets.lower == 2 assert tick.timestamps.upper - tick.timestamps.lower == timedelta(seconds=2)
def subscriptions( *, dataset_name: str, topic: Optional[str], partitions: Optional[int], commit_log_topic: Optional[str], commit_log_groups: Sequence[str], consumer_group: str, auto_offset_reset: str, bootstrap_servers: Sequence[str], max_batch_size: int, max_batch_time_ms: int, max_query_workers: Optional[int], schedule_ttl: int, result_topic: Optional[str], log_level: Optional[str], delay_seconds: Optional[int], ) -> None: """Evaluates subscribed queries for a dataset.""" setup_logging(log_level) setup_sentry() dataset = get_dataset(dataset_name) storage = dataset.get_default_entity().get_writable_storage() assert ( storage is not None ), f"Dataset {dataset_name} does not have a writable storage by default." loader = enforce_table_writer(dataset).get_stream_loader() commit_log_topic_spec = loader.get_commit_log_topic_spec() assert commit_log_topic_spec is not None result_topic_spec = loader.get_subscription_result_topic_spec() assert result_topic_spec is not None metrics = MetricsWrapper( environment.metrics, "subscriptions", tags={ "group": consumer_group, "dataset": dataset_name }, ) consumer = TickConsumer( SynchronizedConsumer( KafkaConsumer( build_kafka_consumer_configuration( loader.get_default_topic_spec().topic, consumer_group, auto_offset_reset=auto_offset_reset, bootstrap_servers=bootstrap_servers, ), ), KafkaConsumer( build_kafka_consumer_configuration( commit_log_topic_spec.topic, f"subscriptions-commit-log-{uuid.uuid1().hex}", auto_offset_reset="earliest", bootstrap_servers=bootstrap_servers, ), ), (Topic(commit_log_topic) if commit_log_topic is not None else Topic(commit_log_topic_spec.topic_name)), set(commit_log_groups), ), time_shift=(timedelta(seconds=delay_seconds * -1) if delay_seconds is not None else None), ) producer = ProducerEncodingWrapper( KafkaProducer( build_kafka_producer_configuration( loader.get_default_topic_spec().topic, bootstrap_servers=bootstrap_servers, override_params={ "partitioner": "consistent", "message.max.bytes": 50000000, # 50MB, default is 1MB }, )), SubscriptionTaskResultEncoder(), ) executor = ThreadPoolExecutor(max_workers=max_query_workers) logger.debug("Starting %r with %s workers...", executor, getattr(executor, "_max_workers", 0)) metrics.gauge("executor.workers", getattr(executor, "_max_workers", 0)) with closing(consumer), executor, closing(producer): from arroyo import configure_metrics configure_metrics(StreamMetricsAdapter(metrics)) batching_consumer = StreamProcessor( consumer, (Topic(topic) if topic is not None else Topic( loader.get_default_topic_spec().topic_name)), BatchProcessingStrategyFactory( SubscriptionWorker( dataset, executor, { index: SubscriptionScheduler( RedisSubscriptionDataStore(redis_client, dataset, PartitionId(index)), PartitionId(index), cache_ttl=timedelta(seconds=schedule_ttl), metrics=metrics, ) for index in range(partitions if partitions is not None else loader. get_default_topic_spec().partitions_number) }, producer, Topic(result_topic) if result_topic is not None else Topic( result_topic_spec.topic_name), metrics, ), max_batch_size, max_batch_time_ms, ), ) def handler(signum: int, frame: Optional[Any]) -> None: batching_consumer.signal_shutdown() signal.signal(signal.SIGINT, handler) signal.signal(signal.SIGTERM, handler) batching_consumer.run()
def subscriptions( *, dataset_name: str, topic: Optional[str], partitions: Optional[int], commit_log_topic: Optional[str], commit_log_groups: Sequence[str], consumer_group: str, auto_offset_reset: str, bootstrap_servers: Sequence[str], max_batch_size: int, max_batch_time_ms: int, schedule_ttl: int, result_topic: Optional[str], log_level: Optional[str], ) -> None: """Evaluates subscribed queries for a dataset.""" assert result_topic is not None setup_logging(log_level) setup_sentry() dataset = get_dataset(dataset_name) if not bootstrap_servers: bootstrap_servers = settings.DEFAULT_DATASET_BROKERS.get( dataset_name, settings.DEFAULT_BROKERS ) loader = enforce_table_writer(dataset).get_stream_loader() consumer = TickConsumer( SynchronizedConsumer( KafkaConsumer( build_kafka_consumer_configuration( bootstrap_servers, consumer_group, auto_offset_reset=auto_offset_reset, ), PassthroughCodec(), ), KafkaConsumer( build_kafka_consumer_configuration( bootstrap_servers, f"subscriptions-commit-log-{uuid.uuid1().hex}", auto_offset_reset="earliest", ), CommitCodec(), ), ( Topic(commit_log_topic) if commit_log_topic is not None else Topic(loader.get_commit_log_topic_spec().topic_name) ), set(commit_log_groups), ) ) producer = KafkaProducer( { "bootstrap.servers": ",".join(bootstrap_servers), "partitioner": "consistent", "message.max.bytes": 50000000, # 50MB, default is 1MB }, SubscriptionResultCodec(), ) with closing(consumer), closing(producer): batching_consumer = BatchingConsumer( consumer, ( Topic(topic) if topic is not None else Topic(loader.get_default_topic_spec().topic_name) ), SubscriptionWorker( SubscriptionExecutor( dataset, ThreadPoolExecutor( max_workers=settings.SUBSCRIPTIONS_MAX_CONCURRENT_QUERIES ), ), { index: SubscriptionScheduler( RedisSubscriptionDataStore( redis_client, dataset, PartitionId(index) ), PartitionId(index), cache_ttl=timedelta(seconds=schedule_ttl), ) for index in range( partitions if partitions is not None else loader.get_default_topic_spec().partitions_number ) }, producer, Topic(result_topic), ), max_batch_size, max_batch_time_ms, create_metrics( "snuba.subscriptions", tags={"group": consumer_group, "dataset": dataset_name}, ), ) def handler(signum, frame) -> None: batching_consumer.signal_shutdown() signal.signal(signal.SIGINT, handler) signal.signal(signal.SIGTERM, handler) batching_consumer.run()
def test_tick_consumer(clock: Clock, broker: Broker[int], time_shift: Optional[timedelta]) -> None: epoch = datetime.fromtimestamp(clock.time()) topic = Topic("messages") broker.create_topic(topic, partitions=2) producer = broker.get_producer() for partition, payloads in enumerate([[0, 1, 2], [0]]): for payload in payloads: producer.produce(Partition(topic, partition), payload).result() inner_consumer = broker.get_consumer("group") consumer = TickConsumer(inner_consumer, time_shift=time_shift) if time_shift is None: time_shift = timedelta() def assignment_callback(offsets: Mapping[Partition, int]) -> None: assignment_callback.called = True assert consumer.tell() == { Partition(topic, 0): 0, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 0, Partition(topic, 1): 0, } assignment_callback.called = False consumer.subscribe([topic], on_assign=assignment_callback) with assert_changes(lambda: assignment_callback.called, False, True): # consume 0, 0 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 0, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } # consume 0, 1 assert consumer.poll() == Message( Partition(topic, 0), 0, Tick(offsets=Interval(0, 1), timestamps=Interval(epoch, epoch)).time_shift(time_shift), epoch, ) assert consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } # consume 0, 2 assert consumer.poll() == Message( Partition(topic, 0), 1, Tick(offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)).time_shift(time_shift), epoch, ) assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 0, } # consume 1, 0 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 1, } # consume no message assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 1, } consumer.seek({Partition(topic, 0): 1}) assert consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 1, } # consume 0, 1 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 1, } # consume 0, 2 assert consumer.poll() == Message( Partition(topic, 0), 1, Tick(offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)).time_shift(time_shift), epoch, ) assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 1, } with pytest.raises(ConsumerError): consumer.seek({Partition(topic, -1): 0})
def test_tick_consumer_non_monotonic(clock: Clock, broker: Broker[int]) -> None: epoch = datetime.fromtimestamp(clock.time()) topic = Topic("messages") partition = Partition(topic, 0) broker.create_topic(topic, partitions=1) producer = broker.get_producer() inner_consumer = broker.get_consumer("group") consumer = TickConsumer(inner_consumer) def assignment_callback(offsets: Mapping[Partition, int]) -> None: assignment_callback.called = True assert inner_consumer.tell() == {partition: 0} assert consumer.tell() == {partition: 0} assignment_callback.called = False consumer.subscribe([topic], on_assign=assignment_callback) producer.produce(partition, 0) clock.sleep(1) producer.produce(partition, 1) with assert_changes(lambda: assignment_callback.called, False, True): assert consumer.poll() is None assert inner_consumer.tell() == {partition: 1} assert consumer.tell() == {partition: 0} with assert_changes(inner_consumer.tell, {partition: 1}, {partition: 2}), assert_changes( consumer.tell, {partition: 0}, {partition: 1}): assert consumer.poll() == Message( partition, 0, Tick( offsets=Interval(0, 1), timestamps=Interval(epoch, epoch + timedelta(seconds=1)), ), epoch + timedelta(seconds=1), ) clock.sleep(-1) producer.produce(partition, 2) with assert_changes(inner_consumer.tell, {partition: 2}, {partition: 3}), assert_does_not_change( consumer.tell, {partition: 1}): assert consumer.poll() is None clock.sleep(2) producer.produce(partition, 3) with assert_changes(inner_consumer.tell, {partition: 3}, {partition: 4}), assert_changes( consumer.tell, {partition: 1}, {partition: 3}): assert consumer.poll() == Message( partition, 1, Tick( offsets=Interval(1, 3), timestamps=Interval(epoch + timedelta(seconds=1), epoch + timedelta(seconds=2)), ), epoch + timedelta(seconds=2), )
def test_tick_consumer() -> None: topic = Topic("messages") broker: DummyBroker[int] = DummyBroker() broker.create_topic(topic, partitions=2) producer: DummyProducer[int] = DummyProducer(broker) for partition, payloads in enumerate([[0, 1, 2], [0]]): for payload in payloads: producer.produce(Partition(topic, partition), payload).result() inner_consumer: Consumer[int] = DummyConsumer(broker, "group") consumer = TickConsumer(inner_consumer) consumer.subscribe([topic]) assert consumer.tell() == { Partition(topic, 0): 0, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 0, Partition(topic, 1): 0, } # consume 0, 0 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 0, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } # consume 0, 1 assert consumer.poll() == Message( Partition(topic, 0), 0, Tick(offsets=Interval(0, 1), timestamps=Interval(epoch, epoch)), epoch, ) assert consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } # consume 0, 2 assert consumer.poll() == Message( Partition(topic, 0), 1, Tick(offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)), epoch, ) assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 0, } # consume 1, 0 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 1, } # consume no message assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 1, } consumer.seek({Partition(topic, 0): 1}) assert consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 1, } # consume 0, 1 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 1, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 1, } # consume 0, 2 assert consumer.poll() == Message( Partition(topic, 0), 1, Tick(offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)), epoch, ) assert consumer.tell() == { Partition(topic, 0): 2, Partition(topic, 1): 0, } assert inner_consumer.tell() == { Partition(topic, 0): 3, Partition(topic, 1): 1, } with pytest.raises(ConsumerError): consumer.seek({Partition(topic, -1): 0})
def test_tick_consumer_non_monotonic() -> None: topic = Topic("messages") partition = Partition(topic, 0) clock = TestingClock(epoch.timestamp()) broker: DummyBroker[int] = DummyBroker(clock) broker.create_topic(topic, partitions=1) producer: DummyProducer[int] = DummyProducer(broker) inner_consumer: Consumer[int] = DummyConsumer(broker, "group") consumer = TickConsumer(inner_consumer) consumer.subscribe([topic]) producer.produce(partition, 0) clock.sleep(1) producer.produce(partition, 1) with assert_changes(inner_consumer.tell, {partition: 0}, {partition: 1}), assert_does_not_change( consumer.tell, {partition: 0}): assert consumer.poll() is None with assert_changes(inner_consumer.tell, {partition: 1}, {partition: 2}), assert_changes( consumer.tell, {partition: 0}, {partition: 1}): assert consumer.poll() == Message( partition, 0, Tick( offsets=Interval(0, 1), timestamps=Interval(epoch, epoch + timedelta(seconds=1)), ), epoch + timedelta(seconds=1), ) clock.sleep(-1) producer.produce(partition, 2) with assert_changes(inner_consumer.tell, {partition: 2}, {partition: 3}), assert_does_not_change( consumer.tell, {partition: 1}): assert consumer.poll() is None clock.sleep(2) producer.produce(partition, 3) with assert_changes(inner_consumer.tell, {partition: 3}, {partition: 4}), assert_changes( consumer.tell, {partition: 1}, {partition: 3}): assert consumer.poll() == Message( partition, 1, Tick( offsets=Interval(1, 3), timestamps=Interval(epoch + timedelta(seconds=1), epoch + timedelta(seconds=2)), ), epoch + timedelta(seconds=2), )