def test_get_kafka_consumer_cluster_options(): cluster_options = get_kafka_consumer_cluster_options("default") assert (cluster_options["bootstrap.servers"] == settings.KAFKA_CLUSTERS["default"]["common"]["bootstrap.servers"]) with override_settings(KAFKA_CLUSTERS={ "default": { "consumers": { "bootstrap.servers": "my.other.server:9092" } } }): cluster_options = get_kafka_consumer_cluster_options("default") assert cluster_options["bootstrap.servers"] == "my.other.server:9092" with override_settings( KAFKA_CLUSTERS={ "default": { "consumers": { "bootstrap.servers": "my.other.server:9092" }, # legacy config: "security.protocol": "plain", "bootstrap.servers": "my.legacy.server:9092", } }): cluster_options = get_kafka_consumer_cluster_options("default") assert cluster_options["bootstrap.servers"] == "my.legacy.server:9092" assert "security.protocol" not in cluster_options
def test_get_kafka_consumer_cluster_options_invalid(): with override_settings(KAFKA_CLUSTERS={ "default": { "common": { "invalid.setting": "value" } } }): with pytest.raises(ValueError): get_kafka_consumer_cluster_options("default")
def __init__( self, group_id, topic=None, commit_batch_size=100, initial_offset_reset="earliest", force_offset_reset=None, ): self.group_id = group_id if not topic: topic = settings.KAFKA_EVENTS_SUBSCRIPTIONS_RESULTS self.topic = topic cluster_name = settings.KAFKA_TOPICS[topic]["cluster"] self.commit_batch_size = commit_batch_size self.initial_offset_reset = initial_offset_reset self.offsets = {} self.consumer = None self.cluster_options = kafka_config.get_kafka_consumer_cluster_options( cluster_name, { "group.id": self.group_id, "session.timeout.ms": 6000, "auto.offset.reset": self.initial_offset_reset, "enable.auto.commit": "false", "enable.auto.offset.store": "false", "enable.partition.eof": "false", "default.topic.config": { "auto.offset.reset": self.initial_offset_reset }, }, ) self.admin_cluster_options = kafka_config.get_kafka_admin_cluster_options( cluster_name, {"allow.auto.create.topics": "true"}) self.resolve_partition_force_offset = self.offset_reset_name_to_func( force_offset_reset)
def create_consumer( self, topics, cluster_name, group_id, auto_offset_reset, queued_max_messages_kbytes, queued_min_messages, ): consumer_config = kafka_config.get_kafka_consumer_cluster_options( cluster_name, override_params={ "enable.auto.commit": False, "group.id": group_id, "default.topic.config": {"auto.offset.reset": auto_offset_reset}, # overridden to reduce memory usage when there's a large backlog "queued.max.messages.kbytes": queued_max_messages_kbytes, "queued.min.messages": queued_min_messages, }, ) if settings.KAFKA_CONSUMER_AUTO_CREATE_TOPICS: # This is required for confluent-kafka>=1.5.0, otherwise the topics will # not be automatically created. conf = kafka_config.get_kafka_admin_cluster_options( cluster_name, override_params={"allow.auto.create.topics": "true"} ) admin_client = AdminClient(conf) wait_for_topics(admin_client, topics) consumer = Consumer(consumer_config) return consumer
def test_bootstrap_format(): with override_settings(KAFKA_CLUSTERS={ "default": { "common": { "bootstrap.servers": ["I", "am", "a", "list"] } } }): with pytest.raises(ValueError): get_kafka_consumer_cluster_options("default") # legacy should not raise an error with override_settings(KAFKA_CLUSTERS={ "default": { "bootstrap.servers": ["I", "am", "a", "list"] } }): cluster_options = get_kafka_producer_cluster_options("default") assert cluster_options["bootstrap.servers"] == "I,am,a,list" cluster_options = get_kafka_consumer_cluster_options("default") assert cluster_options["bootstrap.servers"] == "I,am,a,list"
def test_legacy_custom_mix_customer(): with override_settings( KAFKA_CLUSTERS={ "default": { "common": { "bootstrap.servers": "new.server:9092", "security.protocol": "plain" }, "bootstrap.servers": ["old.server:9092"], }, }): cluster_options = get_kafka_consumer_cluster_options("default") assert cluster_options["bootstrap.servers"] == "old.server:9092" assert "security.protocol" not in cluster_options
def create_consumer( self, topics, cluster_name, group_id, auto_offset_reset, queued_max_messages_kbytes, queued_min_messages, ): consumer_config = kafka_config.get_kafka_consumer_cluster_options( cluster_name, override_params={ "enable.auto.commit": False, "group.id": group_id, "default.topic.config": { "auto.offset.reset": auto_offset_reset }, # overridden to reduce memory usage when there's a large backlog "queued.max.messages.kbytes": queued_max_messages_kbytes, "queued.min.messages": queued_min_messages, }, ) if settings.KAFKA_CONSUMER_AUTO_CREATE_TOPICS: # This is required for confluent-kafka>=1.5.0, otherwise the topics will # not be automatically created. conf = kafka_config.get_kafka_admin_cluster_options( cluster_name, override_params={"allow.auto.create.topics": "true"}) admin_client = AdminClient(conf) wait_for_topics(admin_client, topics) consumer = Consumer(consumer_config) def on_partitions_assigned(consumer, partitions): logger.info("New partitions assigned: %r", partitions) def on_partitions_revoked(consumer, partitions): "Reset the current in-memory batch, letting the next consumer take over where we left off." logger.info("Partitions revoked: %r", partitions) self._flush(force=True) consumer.subscribe(topics, on_assign=on_partitions_assigned, on_revoke=on_partitions_revoked) return consumer
def get_config(topic: str, group_id: str, auto_offset_reset: str) -> MutableMapping[Any, Any]: cluster_name: str = settings.KAFKA_TOPICS[topic]["cluster"] consumer_config: MutableMapping[Any, Any] = kafka_config.get_kafka_consumer_cluster_options( cluster_name, override_params={ "auto.offset.reset": auto_offset_reset, "enable.auto.commit": False, "enable.auto.offset.store": False, "group.id": group_id, # `default.topic.config` is now deprecated. # More details: https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#kafka-client-configuration) "default.topic.config": {"auto.offset.reset": auto_offset_reset}, # overridden to reduce memory usage when there's a large backlog "queued.max.messages.kbytes": DEFAULT_QUEUED_MAX_MESSAGE_KBYTES, "queued.min.messages": DEFAULT_QUEUED_MIN_MESSAGES, }, ) return consumer_config
def __init__( self, cluster_name, consumer_group, commit_log_topic, synchronize_commit_group, initial_offset_reset="latest", on_commit=None, ): self.cluster_name = cluster_name self.consumer_group = consumer_group self.commit_log_topic = commit_log_topic self.synchronize_commit_group = synchronize_commit_group self.initial_offset_reset = self.initial_offset_reset_strategies[ initial_offset_reset] self.__partition_state_manager = SynchronizedPartitionStateManager( self.__on_partition_state_change) ( self.__commit_log_consumer, self.__commit_log_consumer_stop_request, ) = self.__start_commit_log_consumer() self.__positions = {} def commit_callback(error, partitions): if on_commit is not None: return on_commit(error, partitions) consumer_configuration = kafka_config.get_kafka_consumer_cluster_options( cluster_name, override_params={ "group.id": self.consumer_group, "enable.auto.commit": "false", "enable.auto.offset.store": "true", "enable.partition.eof": "false", "default.topic.config": { "auto.offset.reset": "error" }, "on_commit": commit_callback, }, ) self.__consumer = Consumer(consumer_configuration)
def __init__( self, group_id: str, topic: Optional[str] = None, commit_batch_size: int = 100, commit_batch_timeout_ms: int = 5000, initial_offset_reset: str = "earliest", force_offset_reset: Optional[str] = None, ): self.group_id = group_id if not topic: # TODO(typing): Need a way to get the actual value of settings to avoid this topic = cast(str, settings.KAFKA_EVENTS_SUBSCRIPTIONS_RESULTS) self.topic = topic cluster_name: str = settings.KAFKA_TOPICS[topic]["cluster"] self.commit_batch_size = commit_batch_size # Adding time based commit behaviour self.commit_batch_timeout_ms: int = commit_batch_timeout_ms self.__batch_deadline: Optional[float] = None self.initial_offset_reset = initial_offset_reset self.offsets: Dict[int, Optional[int]] = {} self.consumer: Consumer = None self.cluster_options = kafka_config.get_kafka_consumer_cluster_options( cluster_name, { "group.id": self.group_id, "session.timeout.ms": 6000, "auto.offset.reset": self.initial_offset_reset, "enable.auto.commit": "false", "enable.auto.offset.store": "false", "enable.partition.eof": "false", "default.topic.config": { "auto.offset.reset": self.initial_offset_reset }, }, ) self.admin_cluster_options = kafka_config.get_kafka_admin_cluster_options( cluster_name, {"allow.auto.create.topics": "true"}) self.resolve_partition_force_offset = self.offset_reset_name_to_func( force_offset_reset) self.__shutdown_requested = False
def run_commit_log_consumer( cluster_name, consumer_group, commit_log_topic, partition_state_manager, synchronize_commit_group, start_event, stop_request_event, ): start_event.set() logging.debug("Starting commit log consumer...") positions = {} # NOTE: The commit log consumer group should not be persisted into the # ``__consumer_offsets`` topic since no offsets are committed by this # consumer. The group membership metadata messages will be published # initially but as long as this group remains a single consumer it will # be deleted after the consumer is closed. # It is very important to note that the ``group.id`` **MUST** be unique to # this consumer process!!! This ensures that it is able to consume from all # partitions of the commit log topic and get a comprehensive view of the # state of the consumer groups it is tracking. consumer_config = kafka_config.get_kafka_consumer_cluster_options( cluster_name, override_params={ "group.id": consumer_group, "enable.auto.commit": "false", "enable.auto.offset.store": "true", "enable.partition.eof": "false", "default.topic.config": { "auto.offset.reset": "error" }, }, ) consumer = Consumer(consumer_config) def rewind_partitions_on_assignment(consumer, assignment): # The commit log consumer must start consuming from the beginning of # the commit log topic to ensure that it has a comprehensive view of # all active partitions. consumer.assign([ TopicPartition( i.topic, i.partition, positions.get((i.topic, i.partition), OFFSET_BEGINNING)) for i in assignment ]) consumer.subscribe([commit_log_topic], on_assign=rewind_partitions_on_assignment) while not stop_request_event.is_set(): message = consumer.poll(1) if message is None: continue error = message.error() if error is not None: raise Exception(error) positions[(message.topic(), message.partition())] = message.offset() + 1 group, topic, partition, offset = get_commit_data(message) if group != synchronize_commit_group: logger.debug( "Received consumer offsets update from %r, ignoring...", group) continue if offset in LOGICAL_OFFSETS: logger.debug("Skipping invalid logical offset (%r) from %s/%s...", offset, topic, partition) continue elif offset < 0: logger.warning( "Received unexpected negative offset (%r) from %s/%s!", offset, topic, partition) partition_state_manager.set_remote_offset(topic, partition, offset)