예제 #1
0
def test_get_kafka_consumer_cluster_options():
    cluster_options = get_kafka_consumer_cluster_options("default")
    assert (cluster_options["bootstrap.servers"] ==
            settings.KAFKA_CLUSTERS["default"]["common"]["bootstrap.servers"])

    with override_settings(KAFKA_CLUSTERS={
            "default": {
                "consumers": {
                    "bootstrap.servers": "my.other.server:9092"
                }
            }
    }):
        cluster_options = get_kafka_consumer_cluster_options("default")
        assert cluster_options["bootstrap.servers"] == "my.other.server:9092"

    with override_settings(
            KAFKA_CLUSTERS={
                "default": {
                    "consumers": {
                        "bootstrap.servers": "my.other.server:9092"
                    },
                    # legacy config:
                    "security.protocol": "plain",
                    "bootstrap.servers": "my.legacy.server:9092",
                }
            }):
        cluster_options = get_kafka_consumer_cluster_options("default")
        assert cluster_options["bootstrap.servers"] == "my.legacy.server:9092"
        assert "security.protocol" not in cluster_options
예제 #2
0
def test_get_kafka_consumer_cluster_options_invalid():
    with override_settings(KAFKA_CLUSTERS={
            "default": {
                "common": {
                    "invalid.setting": "value"
                }
            }
    }):
        with pytest.raises(ValueError):
            get_kafka_consumer_cluster_options("default")
예제 #3
0
 def __init__(
     self,
     group_id,
     topic=None,
     commit_batch_size=100,
     initial_offset_reset="earliest",
     force_offset_reset=None,
 ):
     self.group_id = group_id
     if not topic:
         topic = settings.KAFKA_EVENTS_SUBSCRIPTIONS_RESULTS
     self.topic = topic
     cluster_name = settings.KAFKA_TOPICS[topic]["cluster"]
     self.commit_batch_size = commit_batch_size
     self.initial_offset_reset = initial_offset_reset
     self.offsets = {}
     self.consumer = None
     self.cluster_options = kafka_config.get_kafka_consumer_cluster_options(
         cluster_name,
         {
             "group.id": self.group_id,
             "session.timeout.ms": 6000,
             "auto.offset.reset": self.initial_offset_reset,
             "enable.auto.commit": "false",
             "enable.auto.offset.store": "false",
             "enable.partition.eof": "false",
             "default.topic.config": {
                 "auto.offset.reset": self.initial_offset_reset
             },
         },
     )
     self.admin_cluster_options = kafka_config.get_kafka_admin_cluster_options(
         cluster_name, {"allow.auto.create.topics": "true"})
     self.resolve_partition_force_offset = self.offset_reset_name_to_func(
         force_offset_reset)
예제 #4
0
    def create_consumer(
        self,
        topics,
        cluster_name,
        group_id,
        auto_offset_reset,
        queued_max_messages_kbytes,
        queued_min_messages,
    ):
        consumer_config = kafka_config.get_kafka_consumer_cluster_options(
            cluster_name,
            override_params={
                "enable.auto.commit": False,
                "group.id": group_id,
                "default.topic.config": {"auto.offset.reset": auto_offset_reset},
                # overridden to reduce memory usage when there's a large backlog
                "queued.max.messages.kbytes": queued_max_messages_kbytes,
                "queued.min.messages": queued_min_messages,
            },
        )

        if settings.KAFKA_CONSUMER_AUTO_CREATE_TOPICS:
            # This is required for confluent-kafka>=1.5.0, otherwise the topics will
            # not be automatically created.
            conf = kafka_config.get_kafka_admin_cluster_options(
                cluster_name, override_params={"allow.auto.create.topics": "true"}
            )
            admin_client = AdminClient(conf)
            wait_for_topics(admin_client, topics)

        consumer = Consumer(consumer_config)

        return consumer
예제 #5
0
def test_bootstrap_format():
    with override_settings(KAFKA_CLUSTERS={
            "default": {
                "common": {
                    "bootstrap.servers": ["I", "am", "a", "list"]
                }
            }
    }):
        with pytest.raises(ValueError):
            get_kafka_consumer_cluster_options("default")

    # legacy should not raise an error
    with override_settings(KAFKA_CLUSTERS={
            "default": {
                "bootstrap.servers": ["I", "am", "a", "list"]
            }
    }):
        cluster_options = get_kafka_producer_cluster_options("default")
        assert cluster_options["bootstrap.servers"] == "I,am,a,list"

        cluster_options = get_kafka_consumer_cluster_options("default")
        assert cluster_options["bootstrap.servers"] == "I,am,a,list"
예제 #6
0
def test_legacy_custom_mix_customer():
    with override_settings(
            KAFKA_CLUSTERS={
                "default": {
                    "common": {
                        "bootstrap.servers": "new.server:9092",
                        "security.protocol": "plain"
                    },
                    "bootstrap.servers": ["old.server:9092"],
                },
            }):
        cluster_options = get_kafka_consumer_cluster_options("default")
        assert cluster_options["bootstrap.servers"] == "old.server:9092"
        assert "security.protocol" not in cluster_options
    def create_consumer(
        self,
        topics,
        cluster_name,
        group_id,
        auto_offset_reset,
        queued_max_messages_kbytes,
        queued_min_messages,
    ):
        consumer_config = kafka_config.get_kafka_consumer_cluster_options(
            cluster_name,
            override_params={
                "enable.auto.commit": False,
                "group.id": group_id,
                "default.topic.config": {
                    "auto.offset.reset": auto_offset_reset
                },
                # overridden to reduce memory usage when there's a large backlog
                "queued.max.messages.kbytes": queued_max_messages_kbytes,
                "queued.min.messages": queued_min_messages,
            },
        )

        if settings.KAFKA_CONSUMER_AUTO_CREATE_TOPICS:
            # This is required for confluent-kafka>=1.5.0, otherwise the topics will
            # not be automatically created.
            conf = kafka_config.get_kafka_admin_cluster_options(
                cluster_name,
                override_params={"allow.auto.create.topics": "true"})
            admin_client = AdminClient(conf)
            wait_for_topics(admin_client, topics)

        consumer = Consumer(consumer_config)

        def on_partitions_assigned(consumer, partitions):
            logger.info("New partitions assigned: %r", partitions)

        def on_partitions_revoked(consumer, partitions):
            "Reset the current in-memory batch, letting the next consumer take over where we left off."
            logger.info("Partitions revoked: %r", partitions)
            self._flush(force=True)

        consumer.subscribe(topics,
                           on_assign=on_partitions_assigned,
                           on_revoke=on_partitions_revoked)

        return consumer
예제 #8
0
def get_config(topic: str, group_id: str, auto_offset_reset: str) -> MutableMapping[Any, Any]:
    cluster_name: str = settings.KAFKA_TOPICS[topic]["cluster"]
    consumer_config: MutableMapping[Any, Any] = kafka_config.get_kafka_consumer_cluster_options(
        cluster_name,
        override_params={
            "auto.offset.reset": auto_offset_reset,
            "enable.auto.commit": False,
            "enable.auto.offset.store": False,
            "group.id": group_id,
            # `default.topic.config` is now deprecated.
            # More details: https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#kafka-client-configuration)
            "default.topic.config": {"auto.offset.reset": auto_offset_reset},
            # overridden to reduce memory usage when there's a large backlog
            "queued.max.messages.kbytes": DEFAULT_QUEUED_MAX_MESSAGE_KBYTES,
            "queued.min.messages": DEFAULT_QUEUED_MIN_MESSAGES,
        },
    )
    return consumer_config
예제 #9
0
    def __init__(
        self,
        cluster_name,
        consumer_group,
        commit_log_topic,
        synchronize_commit_group,
        initial_offset_reset="latest",
        on_commit=None,
    ):
        self.cluster_name = cluster_name
        self.consumer_group = consumer_group
        self.commit_log_topic = commit_log_topic
        self.synchronize_commit_group = synchronize_commit_group
        self.initial_offset_reset = self.initial_offset_reset_strategies[
            initial_offset_reset]

        self.__partition_state_manager = SynchronizedPartitionStateManager(
            self.__on_partition_state_change)
        (
            self.__commit_log_consumer,
            self.__commit_log_consumer_stop_request,
        ) = self.__start_commit_log_consumer()

        self.__positions = {}

        def commit_callback(error, partitions):
            if on_commit is not None:
                return on_commit(error, partitions)

        consumer_configuration = kafka_config.get_kafka_consumer_cluster_options(
            cluster_name,
            override_params={
                "group.id": self.consumer_group,
                "enable.auto.commit": "false",
                "enable.auto.offset.store": "true",
                "enable.partition.eof": "false",
                "default.topic.config": {
                    "auto.offset.reset": "error"
                },
                "on_commit": commit_callback,
            },
        )

        self.__consumer = Consumer(consumer_configuration)
    def __init__(
        self,
        group_id: str,
        topic: Optional[str] = None,
        commit_batch_size: int = 100,
        commit_batch_timeout_ms: int = 5000,
        initial_offset_reset: str = "earliest",
        force_offset_reset: Optional[str] = None,
    ):
        self.group_id = group_id
        if not topic:
            # TODO(typing): Need a way to get the actual value of settings to avoid this
            topic = cast(str, settings.KAFKA_EVENTS_SUBSCRIPTIONS_RESULTS)

        self.topic = topic
        cluster_name: str = settings.KAFKA_TOPICS[topic]["cluster"]
        self.commit_batch_size = commit_batch_size

        # Adding time based commit behaviour
        self.commit_batch_timeout_ms: int = commit_batch_timeout_ms
        self.__batch_deadline: Optional[float] = None

        self.initial_offset_reset = initial_offset_reset
        self.offsets: Dict[int, Optional[int]] = {}
        self.consumer: Consumer = None
        self.cluster_options = kafka_config.get_kafka_consumer_cluster_options(
            cluster_name,
            {
                "group.id": self.group_id,
                "session.timeout.ms": 6000,
                "auto.offset.reset": self.initial_offset_reset,
                "enable.auto.commit": "false",
                "enable.auto.offset.store": "false",
                "enable.partition.eof": "false",
                "default.topic.config": {
                    "auto.offset.reset": self.initial_offset_reset
                },
            },
        )
        self.admin_cluster_options = kafka_config.get_kafka_admin_cluster_options(
            cluster_name, {"allow.auto.create.topics": "true"})
        self.resolve_partition_force_offset = self.offset_reset_name_to_func(
            force_offset_reset)
        self.__shutdown_requested = False
예제 #11
0
def run_commit_log_consumer(
    cluster_name,
    consumer_group,
    commit_log_topic,
    partition_state_manager,
    synchronize_commit_group,
    start_event,
    stop_request_event,
):
    start_event.set()

    logging.debug("Starting commit log consumer...")

    positions = {}

    # NOTE: The commit log consumer group should not be persisted into the
    # ``__consumer_offsets`` topic since no offsets are committed by this
    # consumer. The group membership metadata messages will be published
    # initially but as long as this group remains a single consumer it will
    # be deleted after the consumer is closed.
    # It is very important to note that the ``group.id`` **MUST** be unique to
    # this consumer process!!! This ensures that it is able to consume from all
    # partitions of the commit log topic and get a comprehensive view of the
    # state of the consumer groups it is tracking.
    consumer_config = kafka_config.get_kafka_consumer_cluster_options(
        cluster_name,
        override_params={
            "group.id": consumer_group,
            "enable.auto.commit": "false",
            "enable.auto.offset.store": "true",
            "enable.partition.eof": "false",
            "default.topic.config": {
                "auto.offset.reset": "error"
            },
        },
    )
    consumer = Consumer(consumer_config)

    def rewind_partitions_on_assignment(consumer, assignment):
        # The commit log consumer must start consuming from the beginning of
        # the commit log topic to ensure that it has a comprehensive view of
        # all active partitions.
        consumer.assign([
            TopicPartition(
                i.topic, i.partition,
                positions.get((i.topic, i.partition), OFFSET_BEGINNING))
            for i in assignment
        ])

    consumer.subscribe([commit_log_topic],
                       on_assign=rewind_partitions_on_assignment)

    while not stop_request_event.is_set():
        message = consumer.poll(1)
        if message is None:
            continue

        error = message.error()
        if error is not None:
            raise Exception(error)

        positions[(message.topic(),
                   message.partition())] = message.offset() + 1

        group, topic, partition, offset = get_commit_data(message)
        if group != synchronize_commit_group:
            logger.debug(
                "Received consumer offsets update from %r, ignoring...", group)
            continue

        if offset in LOGICAL_OFFSETS:
            logger.debug("Skipping invalid logical offset (%r) from %s/%s...",
                         offset, topic, partition)
            continue
        elif offset < 0:
            logger.warning(
                "Received unexpected negative offset (%r) from %s/%s!", offset,
                topic, partition)

        partition_state_manager.set_remote_offset(topic, partition, offset)