Esempio n. 1
0
class KafkaConsumer:
    """Defines the base kafka consumer class"""

    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        self.broker_properties = {
                "bootstrap.servers": "PLAINTEXT://localhost:9094",
                "group.id": "udacity",
                "auto.offset.reset": "earliest" if offset_earliest else "latest"
        }

        if is_avro is True:
            self.broker_properties["schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(self.broker_properties)
        else:
            self.consumer = Consumer(self.broker_properties)

        self.consumer.subscribe([self.topic_name_pattern], on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        logger.info("on_assign is incomplete - skipping")
        for partition in partitions:
                partition.offset = OFFSET_BEGINNING

        logger.info("partitions assigned for %s", self.topic_name_pattern)
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        message = self.consumer.poll(1.0)

        if message.error():
            logger.warning(f"Error from consumer {message.error()}")
            return 0

        if message:
            return 1
        else:
            return 0

        logger.info("_consume is incomplete - skipping")
        return 0


    def close(self):
        """Cleans up any open kafka consumers"""
        self.consumer.flush()
Esempio n. 2
0
class KafkaConsumer:
    def __init__(
        self,
        topic_name_pattern,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        self.topic_name_pattern = topic_name_pattern
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        self.broker_properties = {
            "bootstrap.servers": BROKER_URL,
            'group.id': 'consumer',
            "auto.offset.reset": "earliest" if offset_earliest else "latest"
        }

        self.consumer = Consumer(self.broker_properties)

        self.consumer.subscribe(topics=[self.topic_name_pattern])

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        # TODO: If the topic is configured to use `offset_earliest` set the partition offset to
        # the beginning or earliest
        logger.info("on_assign is incomplete - skipping")
        for partition in partitions:
            print(f"consume message {partition.key()}: {partition.value()}")
            if self.offset_earliest == True:
                print('OFFSET_BEGINNING')
                partition.offset = OFFSET_BEGINNING

            logger.info("partitions assigned for %s", self.topic_name_pattern)
            consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()

            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        #
        #
        message = self.consumer.poll(1)
        if message is None:
            print('0')
            return 0
        else:
            print('1')
            print(message.value())
            return 1

        #
        #
        logger.info("_consume is incomplete - skipping")
        return 0

    def close(self):
        """Cleans up any open kafka consumers"""
        self.consumer.flush()
class KafkaConsumer:
    """Defines the base kafka consumer class"""
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        #
        #
        # TODO: Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        #
        self.broker_properties = {
            #
            # TODO
            #
            "bootstrap.servers": BROKER_URL,
            "group.id": "0",
            "schema.registry.url": SCHEMA_REGISTRY_URL
        }

        # TODO: Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.consumer = AvroConsumer(config=self.broker_properties)
        else:
            self.consumer = Consumer({
                "bootstrap.servers":
                self.broker_properties["bootstrap.servers"],
                "group.id":
                self.broker_properties["group.id"]
            })

        #
        #
        # TODO: Configure the AvroConsumer and subscribe to the topics. Make sure to think about
        # how the `on_assign` callback should be invoked.
        #
        #
        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        # TODO: If the topic is configured to use `offset_earliest` set the partition offset to
        # the beginning or earliest
        logger.info("on_assign is incomplete - skipping")
        for partition in partitions:
            if self.offset_earliest:
                partition.offset = OFFSET_BEGINNING
            #
            #
            # TODO
            #
            #
        logger.info("partitions assigned for %s", self.topic_name_pattern)
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""

        # Poll Kafka for messages. Make sure to handle any errors or exceptions.
        # Additionally, make sure you return 1 when a message is processed, and 0 when no message is retrieved.
        try:
            message = self.consumer.poll(self.consume_timeout)
        except Exception as e:
            logger.debug(f"Poll exception {self.topic_name_pattern}: {e}")

        if message is None:
            return 0
        elif message.error() is not None:
            logger.error(
                f"Consumer error {self.topic_name_pattern}: {message.error()}")
        else:
            self.message_handler(message)
            logger.info(f"Message consumed {message.key()}: {message.value()}")
            return 1

    def close(self):
        """Cleans up any open kafka consumers"""
        #
        #
        # TODO: Cleanup the kafka consumer
        #
        #
        if self.consumer is not None:
            self.consumer.flush()