def test_default_config_cli_bootstrap_servers(): broker_config = get_default_kafka_configuration( bootstrap_servers=["cli.server:9092"]) assert broker_config["bootstrap.servers"] == "cli.server:9092" broker_config = get_default_kafka_configuration( bootstrap_servers=["cli.server:9092", "cli2.server:9092"]) assert broker_config[ "bootstrap.servers"] == "cli.server:9092,cli2.server:9092"
def test_default_config_legacy_override_default_servers(): default_broker = "my.broker:9092" settings.DEFAULT_BROKERS = [default_broker] broker_config = get_default_kafka_configuration() assert broker_config["bootstrap.servers"] == default_broker default_brokers = ["my.broker:9092", "my.second.broker:9092"] settings.DEFAULT_BROKERS = default_brokers broker_config = get_default_kafka_configuration() assert broker_config["bootstrap.servers"] == ",".join(default_brokers)
def test_default_config_legacy_override_storage_servers(): storage_name = StorageKey.EVENTS.value storage_key = StorageKey(storage_name) default_broker = "my.broker:9092" settings.DEFAULT_STORAGE_BROKERS = {storage_name: [default_broker]} broker_config = get_default_kafka_configuration(storage_key=storage_key) assert broker_config["bootstrap.servers"] == default_broker default_brokers = ["my.broker:9092", "my.second.broker:9092"] settings.DEFAULT_STORAGE_BROKERS = {storage_name: default_brokers} broker_config = get_default_kafka_configuration(storage_key=storage_key) assert broker_config["bootstrap.servers"] == ",".join(default_brokers)
def test_default_config_override_new_config_storage(): default_broker = "my.other.broker:9092" default_broker_config = { "bootstrap.servers": default_broker, } settings.STORAGE_BROKER_CONFIG = { StorageKey.EVENTS.value: default_broker_config, } broker_config = get_default_kafka_configuration(StorageKey.EVENTS) assert broker_config["bootstrap.servers"] == default_broker other_broker = "another.broker:9092" settings.BROKER_CONFIG = { "bootstrap.servers": other_broker, } broker_config = get_default_kafka_configuration(StorageKey.ERRORS) assert broker_config["bootstrap.servers"] == other_broker
def test_default_config_override_new_config(): default_broker = "my.broker:9092" default_broker_config = { "bootstrap.servers": default_broker, } settings.BROKER_CONFIG = default_broker_config broker_config = get_default_kafka_configuration() assert broker_config["bootstrap.servers"] == default_broker
def test_default_config_legacy_override_storage_servers_fallback(): default_broker = "my.other.broker:9092" default_brokers = ["my.broker:9092", "my.second.broker:9092"] settings.DEFAULT_BROKERS = [default_broker] settings.DEFAULT_STORAGE_BROKERS = { StorageKey.EVENTS.value: default_brokers, } storage_key = StorageKey(StorageKey.ERRORS) broker_config = get_default_kafka_configuration(storage_key=storage_key) assert broker_config["bootstrap.servers"] == default_broker
def test_default_config_new_fallback_old(): old_default_broker = "my.broker:9092" default_broker = "my.other.broker:9092" default_broker_config = { "bootstrap.servers": default_broker, } settings.DEFAULT_BROKERS = [old_default_broker] settings.STORAGE_BROKER_CONFIG = { StorageKey.EVENTS.value: default_broker_config, } broker_config = get_default_kafka_configuration(StorageKey.ERRORS) assert broker_config["bootstrap.servers"] == old_default_broker
def __init__( self, storage_key: StorageKey, raw_topic: Optional[str], replacements_topic: Optional[str], max_batch_size: int, max_batch_time_ms: int, bootstrap_servers: Sequence[str], group_id: str, commit_log_topic: Optional[str], auto_offset_reset: str, queued_max_messages_kbytes: int, queued_min_messages: int, processes: Optional[int], input_block_size: Optional[int], output_block_size: Optional[int], commit_retry_policy: Optional[RetryPolicy] = None, profile_path: Optional[str] = None, ) -> None: self.storage = get_writable_storage(storage_key) self.bootstrap_servers = bootstrap_servers self.broker_config = get_default_kafka_configuration( storage_key, bootstrap_servers=bootstrap_servers ) self.producer_broker_config = build_kafka_producer_configuration( storage_key, bootstrap_servers=bootstrap_servers, override_params={ "partitioner": "consistent", "message.max.bytes": 50000000, # 50MB, default is 1MB }, ) stream_loader = self.storage.get_table_writer().get_stream_loader() self.raw_topic: Topic if raw_topic is not None: self.raw_topic = Topic(raw_topic) else: self.raw_topic = Topic(stream_loader.get_default_topic_spec().topic_name) self.replacements_topic: Optional[Topic] if replacements_topic is not None: self.replacements_topic = Topic(replacements_topic) else: replacement_topic_spec = stream_loader.get_replacement_topic_spec() if replacement_topic_spec is not None: self.replacements_topic = Topic(replacement_topic_spec.topic_name) else: self.replacements_topic = None self.commit_log_topic: Optional[Topic] if commit_log_topic is not None: self.commit_log_topic = Topic(commit_log_topic) else: commit_log_topic_spec = stream_loader.get_commit_log_topic_spec() if commit_log_topic_spec is not None: self.commit_log_topic = Topic(commit_log_topic_spec.topic_name) else: self.commit_log_topic = None # XXX: This can result in a producer being built in cases where it's # not actually required. self.producer = Producer(self.producer_broker_config) self.metrics = MetricsWrapper( environment.metrics, "consumer", tags={"group": group_id, "storage": storage_key.value}, ) self.max_batch_size = max_batch_size self.max_batch_time_ms = max_batch_time_ms self.group_id = group_id self.auto_offset_reset = auto_offset_reset self.queued_max_messages_kbytes = queued_max_messages_kbytes self.queued_min_messages = queued_min_messages self.processes = processes self.input_block_size = input_block_size self.output_block_size = output_block_size self.__profile_path = profile_path if commit_retry_policy is None: commit_retry_policy = BasicRetryPolicy( 3, constant_delay(1), lambda e: isinstance(e, KafkaException) and e.args[0].code() in ( KafkaError.REQUEST_TIMED_OUT, KafkaError.NOT_COORDINATOR, KafkaError._WAIT_COORD, ), ) self.__commit_retry_policy = commit_retry_policy
class TestStrictConsumer: broker_config = get_default_kafka_configuration( bootstrap_servers=["somewhere"]) def __consumer(self, on_message) -> StrictConsumer: return StrictConsumer( topic="my_topic", group_id="something", broker_config=self.broker_config, initial_auto_offset_reset="earliest", partition_assignment_timeout=1, on_partitions_assigned=None, on_partitions_revoked=None, on_message=on_message, ) @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer") def test_empty_topic(self, create_consumer) -> None: kafka_consumer = FakeConfluentKafkaConsumer() kafka_consumer.items = [ build_confluent_kafka_message(0, 0, None, True), ] create_consumer.return_value = kafka_consumer on_message = MagicMock() consumer = self.__consumer(on_message) consumer.run() on_message.assert_not_called() @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer") def test_failure(self, create_consumer) -> None: kafka_consumer = FakeConfluentKafkaConsumer() create_consumer.return_value = kafka_consumer on_message = MagicMock() consumer = self.__consumer(on_message) with pytest.raises(NoPartitionAssigned): consumer.run() on_message.assert_not_called() @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer") def test_one_message(self, create_consumer) -> None: kafka_consumer = FakeConfluentKafkaConsumer() create_consumer.return_value = kafka_consumer msg = build_confluent_kafka_message(0, 0, b"ABCABC", False) kafka_consumer.items = [ msg, build_confluent_kafka_message(0, 0, None, True), ] on_message = MagicMock() on_message.return_value = CommitDecision.DO_NOT_COMMIT consumer = self.__consumer(on_message) consumer.run() on_message.assert_called_once_with(msg) assert kafka_consumer.commit_calls == 0 @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer") def test_commits(self, create_consumer) -> None: kafka_consumer = FakeConfluentKafkaConsumer() create_consumer.return_value = kafka_consumer error = MagicMock() error.code.return_value = KafkaError._PARTITION_EOF kafka_consumer.items = [ build_confluent_kafka_message(0, 0, b"ABCABC", False), build_confluent_kafka_message(1, 0, b"ABCABC", False), build_confluent_kafka_message(2, 0, b"ABCABC", False), build_confluent_kafka_message(0, 0, None, True), ] on_message = MagicMock() on_message.return_value = CommitDecision.COMMIT_PREV consumer = self.__consumer(on_message) consumer.run() on_message.assert_called() assert kafka_consumer.commit_calls == 2
def bootstrap( *, bootstrap_server: Sequence[str], kafka: bool, migrate: bool, force: bool, log_level: Optional[str] = None, ) -> None: """ Warning: Not intended to be used in production yet. """ if not force: raise click.ClickException("Must use --force to run") setup_logging(log_level) logger = logging.getLogger("snuba.bootstrap") import time if kafka: logger.debug("Using Kafka with %r", bootstrap_server) from confluent_kafka.admin import AdminClient, NewTopic attempts = 0 while True: try: logger.debug("Attempting to connect to Kafka (attempt %d)", attempts) client = AdminClient( get_default_kafka_configuration( bootstrap_servers=bootstrap_server, override_params={"socket.timeout.ms": 1000}, ) ) client.list_topics(timeout=1) break except Exception as e: logger.error( "Connection to Kafka failed (attempt %d)", attempts, exc_info=e ) attempts += 1 if attempts == 60: raise time.sleep(1) topics = {} for name in ACTIVE_DATASET_NAMES: dataset = get_dataset(name) for entity in dataset.get_all_entities(): writable_storage = entity.get_writable_storage() if writable_storage: table_writer = writable_storage.get_table_writer() stream_loader = table_writer.get_stream_loader() for topic_spec in stream_loader.get_all_topic_specs(): if topic_spec.topic_name in topics: continue logger.debug( "Adding topic %s to creation list", topic_spec.topic_name ) topics[topic_spec.topic_name] = NewTopic( topic_spec.topic_name, num_partitions=topic_spec.partitions_number, replication_factor=topic_spec.replication_factor, ) logger.debug("Initiating topic creation") for topic, future in client.create_topics( list(topics.values()), operation_timeout=1 ).items(): try: future.result() logger.info("Topic %s created", topic) except Exception as e: logger.error("Failed to create topic %s", topic, exc_info=e) if migrate: check_clickhouse_connections() Runner().run_all(force=True)
def test_default_config(): broker_config = get_default_kafka_configuration() assert (broker_config["bootstrap.servers"] == settings.BROKER_CONFIG["bootstrap.servers"])
class TestBootstrapState: broker_config = get_default_kafka_configuration( bootstrap_servers=["somewhere"]) def __consumer(self, on_message) -> StrictConsumer: return StrictConsumer( topic="topic", group_id="something", broker_config=self.broker_config, auto_offset_reset="earliest", partition_assignment_timeout=1, on_partitions_assigned=None, on_partitions_revoked=None, on_message=on_message, ) @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer") def test_empty_topic(self, create_consumer) -> None: kafka_consumer = FakeConfluentKafkaConsumer() kafka_consumer.items = [ build_confluent_kafka_message(0, 0, None, True), ] create_consumer.return_value = kafka_consumer bootstrap = BootstrapState( "cdc_control", self.broker_config, "something", get_cdc_storage(StorageKey.GROUPEDMESSAGES), ) ret = bootstrap.handle(None) assert ret[0] == ConsumerStateCompletionEvent.NO_SNAPSHOT assert kafka_consumer.commit_calls == 0 @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer") def test_snapshot_for_other_table(self, create_consumer) -> None: kafka_consumer = FakeConfluentKafkaConsumer() kafka_consumer.items = [ build_confluent_kafka_message( 0, 0, b'{"snapshot-id":"abc123", "tables": ["someone_else"], "product":"snuba", "event":"snapshot-init"}', False, ), build_confluent_kafka_message(0, 0, None, True), ] create_consumer.return_value = kafka_consumer bootstrap = BootstrapState( "cdc_control", self.broker_config, "something", get_cdc_storage(StorageKey.GROUPEDMESSAGES), ) ret = bootstrap.handle(None) assert ret[0] == ConsumerStateCompletionEvent.NO_SNAPSHOT assert kafka_consumer.commit_calls == 1 @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer") def test_init_snapshot(self, create_consumer) -> None: kafka_consumer = FakeConfluentKafkaConsumer() kafka_consumer.items = [ build_confluent_kafka_message( 0, 0, b'{"snapshot-id":"abc123", "tables": ["sentry_groupedmessage"], "product":"snuba", "event":"snapshot-init"}', False, ), build_confluent_kafka_message(0, 0, None, True), ] create_consumer.return_value = kafka_consumer bootstrap = BootstrapState( "cdc_control", self.broker_config, "something", get_cdc_storage(StorageKey.GROUPEDMESSAGES), ) ret = bootstrap.handle(None) assert ret[0] == ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED assert kafka_consumer.commit_calls == 0 @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer") def test_snapshot_loaded(self, create_consumer) -> None: kafka_consumer = FakeConfluentKafkaConsumer() kafka_consumer.items = [ build_confluent_kafka_message( 0, 0, b'{"snapshot-id":"abc123", "product":"somewhere-else", "tables": [], "event":"snapshot-init"}', False, ), build_confluent_kafka_message( 1, 0, b'{"snapshot-id":"abc123", "product":"snuba", "tables": ["sentry_groupedmessage"], "event":"snapshot-init"}', False, ), build_confluent_kafka_message( 2, 0, (b'{"snapshot-id":"abc123", "event":"snapshot-loaded",' b'"transaction-info": {"xmin":123, "xmax":124, "xip-list": []}' b"}"), False, ), build_confluent_kafka_message(0, 0, None, True), ] create_consumer.return_value = kafka_consumer bootstrap = BootstrapState( "cdc_control", self.broker_config, "something", get_cdc_storage(StorageKey.GROUPEDMESSAGES), ) ret = bootstrap.handle(None) assert ret[0] == ConsumerStateCompletionEvent.SNAPSHOT_READY_RECEIVED assert kafka_consumer.commit_calls == 2
class KafkaStreamsTestCase(StreamsTestMixin[KafkaPayload], TestCase): configuration = get_default_kafka_configuration() @contextlib.contextmanager def get_topic(self, partitions: int = 1) -> Iterator[Topic]: name = f"test-{uuid.uuid1().hex}" client = AdminClient(self.configuration) [[key, future]] = client.create_topics( [NewTopic(name, num_partitions=partitions, replication_factor=1)]).items() assert key == name assert future.result() is None try: yield Topic(name) finally: [[key, future]] = client.delete_topics([name]).items() assert key == name assert future.result() is None def get_consumer( self, group: Optional[str] = None, enable_end_of_partition: bool = True, auto_offset_reset: str = "earliest", ) -> KafkaConsumer: return KafkaConsumer( { **self.configuration, "auto.offset.reset": auto_offset_reset, "enable.auto.commit": "false", "enable.auto.offset.store": "false", "enable.partition.eof": enable_end_of_partition, "group.id": group if group is not None else uuid.uuid1().hex, "session.timeout.ms": 10000, }, ) def get_producer(self) -> KafkaProducer: return KafkaProducer(self.configuration) def get_payloads(self) -> Iterator[KafkaPayload]: for i in itertools.count(): yield KafkaPayload(None, f"{i}".encode("utf8"), []) def test_auto_offset_reset_earliest(self) -> None: with self.get_topic() as topic: with closing(self.get_producer()) as producer: producer.produce(topic, next(self.get_payloads())).result(5.0) with closing(self.get_consumer( auto_offset_reset="earliest")) as consumer: consumer.subscribe([topic]) message = consumer.poll(10.0) assert isinstance(message, Message) assert message.offset == 0 def test_auto_offset_reset_latest(self) -> None: with self.get_topic() as topic: with closing(self.get_producer()) as producer: producer.produce(topic, next(self.get_payloads())).result(5.0) with closing( self.get_consumer(auto_offset_reset="latest")) as consumer: consumer.subscribe([topic]) try: consumer.poll(10.0) # XXX: getting the subcription is slow except EndOfPartition as error: assert error.partition == Partition(topic, 0) assert error.offset == 1 else: raise AssertionError("expected EndOfPartition error") def test_auto_offset_reset_error(self) -> None: with self.get_topic() as topic: with closing(self.get_producer()) as producer: producer.produce(topic, next(self.get_payloads())).result(5.0) with closing( self.get_consumer(auto_offset_reset="error")) as consumer: consumer.subscribe([topic]) with pytest.raises(ConsumerError): consumer.poll(10.0) # XXX: getting the subcription is slow def test_commit_log_consumer(self) -> None: # XXX: This would be better as an integration test (or at least a test # against an abstract Producer interface) instead of against a test against # a mock. commit_log_producer = FakeConfluentKafkaProducer() consumer: KafkaConsumer = KafkaConsumerWithCommitLog( { **self.configuration, "auto.offset.reset": "earliest", "enable.auto.commit": "false", "enable.auto.offset.store": "false", "enable.partition.eof": "true", "group.id": "test", "session.timeout.ms": 10000, }, producer=commit_log_producer, commit_log_topic=Topic("commit-log"), ) with self.get_topic() as topic, closing(consumer) as consumer: with closing(self.get_producer()) as producer: producer.produce(topic, next(self.get_payloads())).result(5.0) consumer.subscribe([topic]) message = consumer.poll( 10.0) # XXX: getting the subscription is slow assert isinstance(message, Message) consumer.stage_offsets({message.partition: message.next_offset}) assert consumer.commit_offsets() == { Partition(topic, 0): message.next_offset } assert len(commit_log_producer.messages) == 1 commit_message = commit_log_producer.messages[0] assert commit_message.topic() == "commit-log" assert commit_codec.decode( KafkaPayload( commit_message.key(), commit_message.value(), commit_message.headers(), )) == Commit("test", Partition(topic, 0), message.next_offset)