Ejemplo n.º 1
0
class IndexedConsumer():
    """
    A simple consumer to retrieve messages from the input queue when it is time to send them
    """
    def __init__(self, input_topic, hosts):
        self.input_topic = input_topic
        self.consumer = KafkaConsumer(bootstrap_servers=hosts)

    def retrieve_event(self, event_reference):
        self.consumer.set_topic_partitions(
            (self.input_topic, event_reference.partition,
             event_reference.offset))
        message = self.consumer.next()
        event = ScheduledEvent.from_dict(json.loads(message.value))
        return event
Ejemplo n.º 2
0
class KafkaGroupReader:

    def __init__(self, kafka_config):
        self.log = logging.getLogger(__name__)
        self.kafka_config = kafka_config
        self.kafka_groups = defaultdict(set)
        self.finished_partitions = set()

    def read_groups(self):
        self.log.info("Kafka consumer running")
        self.consumer = KafkaConsumer(
            CONSUMER_OFFSET_TOPIC,
            group_id='offset_monitoring_consumer',
            bootstrap_servers=self.kafka_config.broker_list,
            auto_offset_reset='smallest',
            auto_commit_enable=False,
            consumer_timeout_ms=10000,
        )
        self.log.info("Consumer ready")
        self.watermarks = self.get_current_watermarks()
        while not self.finished():
            try:
                message = self.consumer.next()
                max_offset = self.get_max_offset(message.partition)
                if message.offset >= max_offset - 1:
                    self.finished_partitions.add(message.partition)
            except ConsumerTimeout:
                break
            except (
                    FailedPayloadsError,
                    KafkaUnavailableError,
                    LeaderNotAvailableError,
                    NotLeaderForPartitionError,
            ) as e:
                self.log.warning("Got %s, retrying", e.__class__.__name__)
            self.process_consumer_offset_message(message)
        return self.kafka_groups

    def parse_consumer_offset_message(self, message):
        key = bytearray(message.key)
        ((key_schema,), cur) = relative_unpack('>h', key, 0)
        if key_schema not in [0, 1]:
            raise InvalidMessageException()   # This is not an offset commit message
        (group, cur) = read_short_string(key, cur)
        (topic, cur) = read_short_string(key, cur)
        ((partition,), cur) = relative_unpack('>l', key, cur)
        if message.value:
            value = bytearray(message.value)
            ((value_schema,), cur) = relative_unpack('>h', value, 0)
            if value_schema not in [0, 1]:
                raise InvalidMessageException()  # Unrecognized message value
            ((offset,), cur) = relative_unpack('>q', value, cur)
        else:
            offset = None  # Offset was deleted
        return str(group), str(topic), partition, offset

    def process_consumer_offset_message(self, message):
        try:
            group, topic, partition, offset = self.parse_consumer_offset_message(message)
        except InvalidMessageException:
            return

        if offset:
            self.kafka_groups[group].add(topic)
        else:  # No offset means group deletion
            self.kafka_groups.pop(group, None)

    def get_current_watermarks(self):
        self.consumer._client.load_metadata_for_topics()
        offsets = get_topics_watermarks(
            self.consumer._client,
            [CONSUMER_OFFSET_TOPIC],
        )
        return {partition: offset for partition, offset
                in offsets[CONSUMER_OFFSET_TOPIC].iteritems()
                if offset.highmark > offset.lowmark}

    def get_max_offset(self, partition):
        return self.watermarks[partition].highmark

    def finished(self):
        return len(self.finished_partitions) >= len(self.watermarks)
Ejemplo n.º 3
0
class KafkaGroupReader:
    def __init__(self, kafka_config):
        self.log = logging.getLogger(__name__)
        self.kafka_config = kafka_config
        self.kafka_groups = defaultdict(set)
        self.finished_partitions = set()

    def read_groups(self):
        self.log.info("Kafka consumer running")
        self.consumer = KafkaConsumer(
            CONSUMER_OFFSET_TOPIC,
            group_id='offset_monitoring_consumer',
            bootstrap_servers=self.kafka_config.broker_list,
            auto_offset_reset='smallest',
            auto_commit_enable=False,
            consumer_timeout_ms=10000,
        )
        self.log.info("Consumer ready")
        self.watermarks = self.get_current_watermarks()
        while not self.finished():
            try:
                message = self.consumer.next()
                max_offset = self.get_max_offset(message.partition)
                if message.offset >= max_offset - 1:
                    self.finished_partitions.add(message.partition)
            except ConsumerTimeout:
                break
            except (
                    FailedPayloadsError,
                    KafkaUnavailableError,
                    LeaderNotAvailableError,
                    NotLeaderForPartitionError,
            ) as e:
                self.log.warning("Got %s, retrying", e.__class__.__name__)
            self.process_consumer_offset_message(message)
        return self.kafka_groups

    def parse_consumer_offset_message(self, message):
        key = bytearray(message.key)
        ((key_schema, ), cur) = relative_unpack('>h', key, 0)
        if key_schema not in [0, 1]:
            raise InvalidMessageException(
            )  # This is not an offset commit message
        (group, cur) = read_short_string(key, cur)
        (topic, cur) = read_short_string(key, cur)
        ((partition, ), cur) = relative_unpack('>l', key, cur)
        if message.value:
            value = bytearray(message.value)
            ((value_schema, ), cur) = relative_unpack('>h', value, 0)
            if value_schema not in [0, 1]:
                raise InvalidMessageException()  # Unrecognized message value
            ((offset, ), cur) = relative_unpack('>q', value, cur)
        else:
            offset = None  # Offset was deleted
        return str(group), str(topic), partition, offset

    def process_consumer_offset_message(self, message):
        try:
            group, topic, partition, offset = self.parse_consumer_offset_message(
                message)
        except InvalidMessageException:
            return

        if offset:
            self.kafka_groups[group].add(topic)
        else:  # No offset means group deletion
            self.kafka_groups.pop(group, None)

    def get_current_watermarks(self):
        self.consumer._client.load_metadata_for_topics()
        offsets = get_topics_watermarks(
            self.consumer._client,
            [CONSUMER_OFFSET_TOPIC],
        )
        return {
            partition: offset
            for partition, offset in
            offsets[CONSUMER_OFFSET_TOPIC].iteritems()
            if offset.highmark > offset.lowmark
        }

    def get_max_offset(self, partition):
        return self.watermarks[partition].highmark

    def finished(self):
        return len(self.finished_partitions) >= len(self.watermarks)
Ejemplo n.º 4
0
class KafkaGroupReader:
    def __init__(self, kafka_config):
        self.log = logging.getLogger(__name__)
        self.kafka_config = kafka_config
        self.kafka_groups = defaultdict(set)
        self.active_partitions = {}
        self._finished = False

    def read_group(self, group_id):
        partition_count = get_offset_topic_partition_count(self.kafka_config)
        partition = get_group_partition(group_id, partition_count)
        return self.read_groups(partition).get(group_id, [])

    def read_groups(self, partition=None):
        self.consumer = KafkaConsumer(
            group_id='offset_monitoring_consumer',
            bootstrap_servers=self.kafka_config.broker_list,
            auto_offset_reset='earliest',
            enable_auto_commit=False,
            consumer_timeout_ms=30000,
            fetch_max_wait_ms=2000,
            max_partition_fetch_bytes=10 * 1024 * 1024,  # 10MB
        )

        if partition is not None:
            self.active_partitions = {
                partition: TopicPartition(CONSUMER_OFFSET_TOPIC, partition),
            }
        else:
            self.active_partitions = {
                p: TopicPartition(CONSUMER_OFFSET_TOPIC, p)
                for p in self.consumer.partitions_for_topic(
                    CONSUMER_OFFSET_TOPIC)
            }
        self.watermarks = self.get_current_watermarks(
            self.active_partitions.values())
        # Active partitions are not empty. Remove the empty ones.
        self.active_partitions = {
            p: tp
            for p, tp in self.active_partitions.items()
            if tp.partition in self.watermarks
            and self.watermarks[tp.partition].highmark > 0 and self.watermarks[
                tp.partition].highmark > self.watermarks[tp.partition].lowmark
        }
        # Cannot consume if there are no active partitions
        if not self.active_partitions:
            return {}

        self.consumer.assign(self.active_partitions.values())
        self.log.info("Consuming from %s", self.active_partitions)
        while not self.finished():
            try:
                message = self.consumer.next()
            except StopIteration:
                continue
            # Stop when reaching the last message written to the
            # __consumer_offsets topic when KafkaGroupReader first started
            if message.offset >= self.watermarks[
                    message.partition].highmark - 1:
                self.remove_partition_from_consumer(message.partition)
            self.process_consumer_offset_message(message)

        return {
            group: topics
            for group, topics in self.kafka_groups.items() if topics
        }

    def remove_partition_from_consumer(self, partition):
        deleted = self.active_partitions.pop(partition)
        # Terminate if there are no more partitions to consume
        if not self.active_partitions:
            self.log.info("Completed reading from all partitions")
            self._finished = True
            return
        # Reassign the remaining partitions to the consumer while saving the
        # position
        positions = [(p, self.consumer.position(p))
                     for p in self.active_partitions.values()]
        self.consumer.assign(self.active_partitions.values())
        for topic_partition, position in positions:
            self.consumer.seek(topic_partition, position)
        self.log.info(
            "Completed reading from %s. Remaining partitions: %s",
            deleted,
            self.active_partitions,
        )

    def parse_consumer_offset_message(self, message):
        key = bytearray(message.key)
        ((key_schema, ), cur) = relative_unpack(b'>h', key, 0)
        if key_schema not in [0, 1]:
            raise InvalidMessageException(
            )  # This is not an offset commit message
        (group, cur) = read_short_string(key, cur)
        (topic, cur) = read_short_string(key, cur)
        ((partition, ), cur) = relative_unpack(b'>l', key, cur)
        if message.value:
            value = bytearray(message.value)
            ((value_schema, ), cur) = relative_unpack(b'>h', value, 0)
            if value_schema not in [0, 1]:
                raise InvalidMessageException()  # Unrecognized message value
            ((offset, ), cur) = relative_unpack(b'>q', value, cur)
        else:
            offset = None  # Offset was deleted
        return str(group), str(topic), partition, offset

    def process_consumer_offset_message(self, message):
        try:
            group, topic, partition, offset = self.parse_consumer_offset_message(
                message)
        except InvalidMessageException:
            return

        if offset and (group not in self.kafka_groups
                       or topic not in self.kafka_groups[group]):
            self.kafka_groups[group].add(topic)
            self.log.info("Added group %s topic %s to list of groups", group,
                          topic)
        elif not offset and group in self.kafka_groups and \
                topic in self.kafka_groups[group]:  # No offset means topic deletion
            self.kafka_groups[group].discard(topic)
            self.log.info("Removed group %s topic %s from list of groups",
                          group, topic)

    def get_current_watermarks(self, partitions=None):
        client = KafkaToolClient(self.kafka_config.broker_list)
        client.load_metadata_for_topics(CONSUMER_OFFSET_TOPIC)
        offsets = get_topics_watermarks(
            client,
            [CONSUMER_OFFSET_TOPIC],
        )
        partitions_set = set(tp.partition
                             for tp in partitions) if partitions else None
        return {
            part: offset
            for part, offset in offsets[CONSUMER_OFFSET_TOPIC].iteritems()
            if offset.highmark > offset.lowmark and (
                partitions is None or part in partitions_set)
        }

    def finished(self):
        return self._finished
Ejemplo n.º 5
0
Copyright (c) 2019 Arshdeep Bahga and Vijay Madisetti

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
'''

from kafka.client import KafkaClient
from kafka.consumer import KafkaConsumer

client = KafkaClient("localhost:6667")
consumer = KafkaConsumer("test", metadata_broker_list=['localhost:6667'])

while True:
    data = consumer.next().value
    print data