def test_commit_log_consumer() -> None: # XXX: This would be better as an integration test (or at least a test # against an abstract Producer interface) instead of against a test against # a mock. commit_log_producer = FakeConfluentKafkaProducer() configuration = get_default_kafka_configuration() consumer: KafkaConsumer = KafkaConsumerWithCommitLog( { **configuration, "auto.offset.reset": "earliest", "enable.auto.commit": "false", "enable.auto.offset.store": "false", "enable.partition.eof": "true", "group.id": "test", "session.timeout.ms": 10000, }, producer=commit_log_producer, commit_log_topic=Topic("commit-log"), ) producer = KafkaProducer(configuration) topic = Topic("topic") with closing(consumer) as consumer: with closing(producer) as producer: producer.produce(topic, next(get_payloads())).result(5.0) consumer.subscribe([topic]) message = consumer.poll(10.0) # XXX: getting the subscription is slow assert isinstance(message, Message) now = datetime.now() position = Position(message.next_offset, now) consumer.stage_positions({message.partition: position}) assert consumer.commit_positions() == {Partition(topic, 0): position} assert len(commit_log_producer.messages) == 1 commit_message = commit_log_producer.messages[0] assert commit_message.topic() == "commit-log" assert commit_codec.decode( KafkaPayload( commit_message.key(), commit_message.value(), commit_message.headers(), )) == Commit("test", Partition(topic, 0), message.next_offset, now)
def test_scheduler_consumer() -> None: settings.TOPIC_PARTITION_COUNTS = {"events": 2} importlib.reload(scheduler_consumer) admin_client = AdminClient(get_default_kafka_configuration()) create_topics(admin_client, [SnubaTopic.COMMIT_LOG]) metrics_backend = TestingMetricsBackend() entity_name = "events" entity = get_entity(EntityKey(entity_name)) storage = entity.get_writable_storage() assert storage is not None stream_loader = storage.get_table_writer().get_stream_loader() commit_log_topic = Topic("snuba-commit-log") mock_scheduler_producer = mock.Mock() from snuba.redis import redis_client from snuba.subscriptions.data import PartitionId, SubscriptionData from snuba.subscriptions.entity_subscription import EventsSubscription from snuba.subscriptions.store import RedisSubscriptionDataStore entity_key = EntityKey(entity_name) partition_index = 0 store = RedisSubscriptionDataStore(redis_client, entity_key, PartitionId(partition_index)) store.create( uuid.uuid4(), SubscriptionData( project_id=1, time_window_sec=60, resolution_sec=60, query="MATCH events SELECT count()", entity_subscription=EventsSubscription(data_dict={}), ), ) builder = scheduler_consumer.SchedulerBuilder( entity_name, str(uuid.uuid1().hex), "events", mock_scheduler_producer, "latest", False, 60 * 5, None, None, metrics_backend, ) scheduler = builder.build_consumer() time.sleep(2) scheduler._run_once() scheduler._run_once() scheduler._run_once() epoch = datetime(1970, 1, 1) producer = KafkaProducer( build_kafka_producer_configuration( stream_loader.get_default_topic_spec().topic, )) for (partition, offset, orig_message_ts) in [ (0, 0, epoch), (1, 0, epoch + timedelta(minutes=1)), (0, 1, epoch + timedelta(minutes=2)), (1, 1, epoch + timedelta(minutes=3)), ]: fut = producer.produce( commit_log_topic, payload=commit_codec.encode( Commit( "events", Partition(commit_log_topic, partition), offset, orig_message_ts, )), ) fut.result() producer.close() for _ in range(5): scheduler._run_once() scheduler._shutdown() assert mock_scheduler_producer.produce.call_count == 2 settings.TOPIC_PARTITION_COUNTS = {}
def test_executor_consumer() -> None: """ End to end integration test """ state.set_config("subscription_mode_events", "new") admin_client = AdminClient(get_default_kafka_configuration()) create_topics(admin_client, [SnubaTopic.SUBSCRIPTION_SCHEDULED_EVENTS]) create_topics(admin_client, [SnubaTopic.SUBSCRIPTION_RESULTS_EVENTS]) dataset_name = "events" entity_name = "events" entity_key = EntityKey(entity_name) entity = get_entity(entity_key) storage = entity.get_writable_storage() assert storage is not None stream_loader = storage.get_table_writer().get_stream_loader() scheduled_result_topic_spec = stream_loader.get_subscription_result_topic_spec( ) assert scheduled_result_topic_spec is not None result_producer = KafkaProducer( build_kafka_producer_configuration(scheduled_result_topic_spec.topic)) result_consumer = KafkaConsumer( build_kafka_consumer_configuration( scheduled_result_topic_spec.topic, str(uuid.uuid1().hex), auto_offset_reset="latest", strict_offset_reset=False, )) assigned = False def on_partitions_assigned(partitions: Mapping[Partition, int]) -> None: nonlocal assigned assigned = True result_consumer.subscribe( [Topic(scheduled_result_topic_spec.topic_name)], on_assign=on_partitions_assigned, ) attempts = 10 while attempts > 0 and not assigned: result_consumer.poll(1.0) attempts -= 1 # We need to wait for the consumer to receive partitions otherwise, # when we try to consume messages, we will not find anything. # Subscription is an async process. assert assigned == True, "Did not receive assignment within 10 attempts" consumer_group = str(uuid.uuid1().hex) auto_offset_reset = "latest" strict_offset_reset = False executor = build_executor_consumer( dataset_name, [entity_name], consumer_group, result_producer, 2, 2, auto_offset_reset, strict_offset_reset, TestingMetricsBackend(), None, ) for i in range(1, 5): # Give time to the executor to subscribe time.sleep(1) executor._run_once() # Produce a scheduled task to the scheduled subscriptions topic subscription_data = SubscriptionData( project_id=1, query="MATCH (events) SELECT count()", time_window_sec=60, resolution_sec=60, entity_subscription=EventsSubscription(data_dict={}), ) task = ScheduledSubscriptionTask( timestamp=datetime(1970, 1, 1), task=SubscriptionWithMetadata( entity_key, Subscription( SubscriptionIdentifier( PartitionId(1), uuid.UUID("91b46cb6224f11ecb2ddacde48001122")), subscription_data, ), 1, ), ) encoder = SubscriptionScheduledTaskEncoder() encoded_task = encoder.encode(task) scheduled_topic_spec = stream_loader.get_subscription_scheduled_topic_spec( ) assert scheduled_topic_spec is not None tasks_producer = KafkaProducer( build_kafka_producer_configuration(scheduled_topic_spec.topic)) scheduled_topic = Topic(scheduled_topic_spec.topic_name) tasks_producer.produce(scheduled_topic, payload=encoded_task).result() tasks_producer.close() executor._run_once() executor.signal_shutdown() # Call run here so that the executor shuts down itself cleanly. executor.run() result = result_consumer.poll(5) assert result is not None, "Did not receive a result message" data = json.loads(result.payload.value) assert (data["payload"]["subscription_id"] == "1/91b46cb6224f11ecb2ddacde48001122"), "Invalid subscription id" result_producer.close()