class KafkaConsumer: """Defines the base kafka consumer class""" def __init__( self, topic_name_pattern, message_handler, is_avro=True, offset_earliest=False, sleep_secs=1.0, consume_timeout=0.1, ): """Creates a consumer object for asynchronous use""" self.topic_name_pattern = topic_name_pattern self.message_handler = message_handler self.sleep_secs = sleep_secs self.consume_timeout = consume_timeout self.offset_earliest = offset_earliest self.broker_properties = { "bootstrap.servers": "PLAINTEXT://localhost:9094", "group.id": "udacity", "auto.offset.reset": "earliest" if offset_earliest else "latest" } if is_avro is True: self.broker_properties["schema.registry.url"] = "http://localhost:8081" self.consumer = AvroConsumer(self.broker_properties) else: self.consumer = Consumer(self.broker_properties) self.consumer.subscribe([self.topic_name_pattern], on_assign=self.on_assign) def on_assign(self, consumer, partitions): """Callback for when topic assignment takes place""" logger.info("on_assign is incomplete - skipping") for partition in partitions: partition.offset = OFFSET_BEGINNING logger.info("partitions assigned for %s", self.topic_name_pattern) consumer.assign(partitions) async def consume(self): """Asynchronously consumes data from kafka topic""" while True: num_results = 1 while num_results > 0: num_results = self._consume() await gen.sleep(self.sleep_secs) def _consume(self): """Polls for a message. Returns 1 if a message was received, 0 otherwise""" message = self.consumer.poll(1.0) if message.error(): logger.warning(f"Error from consumer {message.error()}") return 0 if message: return 1 else: return 0 logger.info("_consume is incomplete - skipping") return 0 def close(self): """Cleans up any open kafka consumers""" self.consumer.flush()
class KafkaConsumer: def __init__( self, topic_name_pattern, offset_earliest=False, sleep_secs=1.0, consume_timeout=0.1, ): self.topic_name_pattern = topic_name_pattern self.sleep_secs = sleep_secs self.consume_timeout = consume_timeout self.offset_earliest = offset_earliest self.broker_properties = { "bootstrap.servers": BROKER_URL, 'group.id': 'consumer', "auto.offset.reset": "earliest" if offset_earliest else "latest" } self.consumer = Consumer(self.broker_properties) self.consumer.subscribe(topics=[self.topic_name_pattern]) def on_assign(self, consumer, partitions): """Callback for when topic assignment takes place""" # TODO: If the topic is configured to use `offset_earliest` set the partition offset to # the beginning or earliest logger.info("on_assign is incomplete - skipping") for partition in partitions: print(f"consume message {partition.key()}: {partition.value()}") if self.offset_earliest == True: print('OFFSET_BEGINNING') partition.offset = OFFSET_BEGINNING logger.info("partitions assigned for %s", self.topic_name_pattern) consumer.assign(partitions) async def consume(self): """Asynchronously consumes data from kafka topic""" while True: num_results = 1 while num_results > 0: num_results = self._consume() await gen.sleep(self.sleep_secs) def _consume(self): """Polls for a message. Returns 1 if a message was received, 0 otherwise""" # # message = self.consumer.poll(1) if message is None: print('0') return 0 else: print('1') print(message.value()) return 1 # # logger.info("_consume is incomplete - skipping") return 0 def close(self): """Cleans up any open kafka consumers""" self.consumer.flush()
class KafkaConsumer: """Defines the base kafka consumer class""" def __init__( self, topic_name_pattern, message_handler, is_avro=True, offset_earliest=False, sleep_secs=1.0, consume_timeout=0.1, ): """Creates a consumer object for asynchronous use""" self.topic_name_pattern = topic_name_pattern self.message_handler = message_handler self.sleep_secs = sleep_secs self.consume_timeout = consume_timeout self.offset_earliest = offset_earliest # # # TODO: Configure the broker properties below. Make sure to reference the project README # and use the Host URL for Kafka and Schema Registry! # # self.broker_properties = { # # TODO # "bootstrap.servers": BROKER_URL, "group.id": "0", "schema.registry.url": SCHEMA_REGISTRY_URL } # TODO: Create the Consumer, using the appropriate type. if is_avro is True: self.consumer = AvroConsumer(config=self.broker_properties) else: self.consumer = Consumer({ "bootstrap.servers": self.broker_properties["bootstrap.servers"], "group.id": self.broker_properties["group.id"] }) # # # TODO: Configure the AvroConsumer and subscribe to the topics. Make sure to think about # how the `on_assign` callback should be invoked. # # self.consumer.subscribe([self.topic_name_pattern], on_assign=self.on_assign) def on_assign(self, consumer, partitions): """Callback for when topic assignment takes place""" # TODO: If the topic is configured to use `offset_earliest` set the partition offset to # the beginning or earliest logger.info("on_assign is incomplete - skipping") for partition in partitions: if self.offset_earliest: partition.offset = OFFSET_BEGINNING # # # TODO # # logger.info("partitions assigned for %s", self.topic_name_pattern) consumer.assign(partitions) async def consume(self): """Asynchronously consumes data from kafka topic""" while True: num_results = 1 while num_results > 0: num_results = self._consume() await gen.sleep(self.sleep_secs) def _consume(self): """Polls for a message. Returns 1 if a message was received, 0 otherwise""" # Poll Kafka for messages. Make sure to handle any errors or exceptions. # Additionally, make sure you return 1 when a message is processed, and 0 when no message is retrieved. try: message = self.consumer.poll(self.consume_timeout) except Exception as e: logger.debug(f"Poll exception {self.topic_name_pattern}: {e}") if message is None: return 0 elif message.error() is not None: logger.error( f"Consumer error {self.topic_name_pattern}: {message.error()}") else: self.message_handler(message) logger.info(f"Message consumed {message.key()}: {message.value()}") return 1 def close(self): """Cleans up any open kafka consumers""" # # # TODO: Cleanup the kafka consumer # # if self.consumer is not None: self.consumer.flush()