def test_scheduler_consumer() -> None: settings.TOPIC_PARTITION_COUNTS = {"events": 2} importlib.reload(scheduler_consumer) admin_client = AdminClient(get_default_kafka_configuration()) create_topics(admin_client, [SnubaTopic.COMMIT_LOG]) metrics_backend = TestingMetricsBackend() entity_name = "events" entity = get_entity(EntityKey(entity_name)) storage = entity.get_writable_storage() assert storage is not None stream_loader = storage.get_table_writer().get_stream_loader() commit_log_topic = Topic("snuba-commit-log") mock_scheduler_producer = mock.Mock() from snuba.redis import redis_client from snuba.subscriptions.data import PartitionId, SubscriptionData from snuba.subscriptions.entity_subscription import EventsSubscription from snuba.subscriptions.store import RedisSubscriptionDataStore entity_key = EntityKey(entity_name) partition_index = 0 store = RedisSubscriptionDataStore(redis_client, entity_key, PartitionId(partition_index)) store.create( uuid.uuid4(), SubscriptionData( project_id=1, time_window_sec=60, resolution_sec=60, query="MATCH events SELECT count()", entity_subscription=EventsSubscription(data_dict={}), ), ) builder = scheduler_consumer.SchedulerBuilder( entity_name, str(uuid.uuid1().hex), "events", mock_scheduler_producer, "latest", False, 60 * 5, None, None, metrics_backend, ) scheduler = builder.build_consumer() time.sleep(2) scheduler._run_once() scheduler._run_once() scheduler._run_once() epoch = datetime(1970, 1, 1) producer = KafkaProducer( build_kafka_producer_configuration( stream_loader.get_default_topic_spec().topic, )) for (partition, offset, orig_message_ts) in [ (0, 0, epoch), (1, 0, epoch + timedelta(minutes=1)), (0, 1, epoch + timedelta(minutes=2)), (1, 1, epoch + timedelta(minutes=3)), ]: fut = producer.produce( commit_log_topic, payload=commit_codec.encode( Commit( "events", Partition(commit_log_topic, partition), offset, orig_message_ts, )), ) fut.result() producer.close() for _ in range(5): scheduler._run_once() scheduler._shutdown() assert mock_scheduler_producer.produce.call_count == 2 settings.TOPIC_PARTITION_COUNTS = {}
def closing(producer: KafkaProducer) -> Iterator[Optional[KafkaProducer]]: try: yield producer finally: producer.close().result()
def test_executor_consumer() -> None: """ End to end integration test """ state.set_config("subscription_mode_events", "new") admin_client = AdminClient(get_default_kafka_configuration()) create_topics(admin_client, [SnubaTopic.SUBSCRIPTION_SCHEDULED_EVENTS]) create_topics(admin_client, [SnubaTopic.SUBSCRIPTION_RESULTS_EVENTS]) dataset_name = "events" entity_name = "events" entity_key = EntityKey(entity_name) entity = get_entity(entity_key) storage = entity.get_writable_storage() assert storage is not None stream_loader = storage.get_table_writer().get_stream_loader() scheduled_result_topic_spec = stream_loader.get_subscription_result_topic_spec( ) assert scheduled_result_topic_spec is not None result_producer = KafkaProducer( build_kafka_producer_configuration(scheduled_result_topic_spec.topic)) result_consumer = KafkaConsumer( build_kafka_consumer_configuration( scheduled_result_topic_spec.topic, str(uuid.uuid1().hex), auto_offset_reset="latest", strict_offset_reset=False, )) assigned = False def on_partitions_assigned(partitions: Mapping[Partition, int]) -> None: nonlocal assigned assigned = True result_consumer.subscribe( [Topic(scheduled_result_topic_spec.topic_name)], on_assign=on_partitions_assigned, ) attempts = 10 while attempts > 0 and not assigned: result_consumer.poll(1.0) attempts -= 1 # We need to wait for the consumer to receive partitions otherwise, # when we try to consume messages, we will not find anything. # Subscription is an async process. assert assigned == True, "Did not receive assignment within 10 attempts" consumer_group = str(uuid.uuid1().hex) auto_offset_reset = "latest" strict_offset_reset = False executor = build_executor_consumer( dataset_name, [entity_name], consumer_group, result_producer, 2, 2, auto_offset_reset, strict_offset_reset, TestingMetricsBackend(), None, ) for i in range(1, 5): # Give time to the executor to subscribe time.sleep(1) executor._run_once() # Produce a scheduled task to the scheduled subscriptions topic subscription_data = SubscriptionData( project_id=1, query="MATCH (events) SELECT count()", time_window_sec=60, resolution_sec=60, entity_subscription=EventsSubscription(data_dict={}), ) task = ScheduledSubscriptionTask( timestamp=datetime(1970, 1, 1), task=SubscriptionWithMetadata( entity_key, Subscription( SubscriptionIdentifier( PartitionId(1), uuid.UUID("91b46cb6224f11ecb2ddacde48001122")), subscription_data, ), 1, ), ) encoder = SubscriptionScheduledTaskEncoder() encoded_task = encoder.encode(task) scheduled_topic_spec = stream_loader.get_subscription_scheduled_topic_spec( ) assert scheduled_topic_spec is not None tasks_producer = KafkaProducer( build_kafka_producer_configuration(scheduled_topic_spec.topic)) scheduled_topic = Topic(scheduled_topic_spec.topic_name) tasks_producer.produce(scheduled_topic, payload=encoded_task).result() tasks_producer.close() executor._run_once() executor.signal_shutdown() # Call run here so that the executor shuts down itself cleanly. executor.run() result = result_consumer.poll(5) assert result is not None, "Did not receive a result message" data = json.loads(result.payload.value) assert (data["payload"]["subscription_id"] == "1/91b46cb6224f11ecb2ddacde48001122"), "Invalid subscription id" result_producer.close()