def test_any_method_after_close_throws_exception(): """ Calling any consumer method after close should thorw a RuntimeError """ c = Consumer({'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100}) c.subscribe(["test"]) c.unsubscribe() c.close() with pytest.raises(RuntimeError) as ex: c.subscribe(['test']) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.unsubscribe() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.poll() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.consume() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.assign([TopicPartition('test', 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.unassign() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.assignment() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.commit() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.committed([TopicPartition("test", 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.position([TopicPartition("test", 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.seek([TopicPartition("test", 0, 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: lo, hi = c.get_watermark_offsets(TopicPartition("test", 0)) assert ex.match('Consumer closed')
def test_any_method_after_close_throws_exception(): """ Calling any consumer method after close should thorw a RuntimeError """ c = Consumer({'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100}) c.subscribe(["test"]) c.unsubscribe() c.close() with pytest.raises(RuntimeError) as ex: c.subscribe(['test']) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.unsubscribe() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.poll() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.consume() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.assign([TopicPartition('test', 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.unassign() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.assignment() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.commit() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.committed([TopicPartition("test", 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.position([TopicPartition("test", 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.seek([TopicPartition("test", 0, 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: lo, hi = c.get_watermark_offsets(TopicPartition("test", 0)) assert 'Consumer closed' == str(ex.value)
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb (err, partitions): pass kc = Consumer({'group.id':'test', 'socket.timeout.ms':'100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb}) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke (consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') partitions = list(map(lambda p: TopicPartition("test", p), range(0,100,3))) kc.assign(partitions) kc.unassign() kc.commit(async=True) try: kc.commit(async=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == -1001]) == len(partitions) try: offsets = kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT kc.close()
class KafkaConsumer: """Defines the base kafka consumer class""" consumer_group_counter = 0 def __init__( self, topic_name_pattern, message_handler, is_avro=True, offset_earliest=False, sleep_secs=1.0, consume_timeout=0.1, ): """Creates a consumer object for asynchronous use""" self.topic_name_pattern = topic_name_pattern self.message_handler = message_handler self.sleep_secs = sleep_secs self.consume_timeout = consume_timeout self.offset_earliest = offset_earliest # # Configure the broker properties below. Make sure to reference the project README # and use the Host URL for Kafka and Schema Registry! # KafkaConsumer.consumer_group_counter = KafkaConsumer.consumer_group_counter + 1 if (self.offset_earliest): self.broker_properties = { CTAConstants.MAP_KEY_BOOTSTRAP_SERVERS:CTAConstants.BOOTSTRAP_SERVERS, "group.id":f"{CTAConstants.CONSUMER_GRP_ID_PRFX}-{KafkaConsumer.consumer_group_counter}", "default.topic.config": {"auto.offset.reset":"earliest"} } else: self.broker_properties = { CTAConstants.MAP_KEY_BOOTSTRAP_SERVERS:CTAConstants.BOOTSTRAP_SERVERS, "group.id":f"{CTAConstants.CONSUMER_GRP_ID_PRFX}-{KafkaConsumer.consumer_group_counter}", "default.topic.config": {"auto.offset.reset":"earliest"} } # Create the Consumer, using the appropriate type. if is_avro is True: self.broker_properties["schema.registry.url"] = CTAConstants.SCHEMA_REGISTRY_HOST self.consumer = AvroConsumer(self.broker_properties) else: self.consumer = Consumer(self.broker_properties) # # Configure the AvroConsumer and subscribe to the topics. Make sure to think about # how the `on_assign` callback should be invoked. # self.consumer.subscribe([topic_name_pattern], on_assign=self.on_assign) logger.info(f"Instantiated consumer and subscribed: ({self.topic_name_pattern})") # Called back on assign of partition(s) to this Consumer. def on_assign(self, consumer, partitions): """Callback for when topic assignment takes place""" # If the topic is configured to use `offset_earliest` set the partition offset to # the beginning or earliest logger.info("on_assign") if (self.offset_earliest): for partition in partitions: #partition.offset(Offset.OFFSET_BEGINNING) partition.offset = confluent_kafka.OFFSET_BEGINNING logger.info("partitions assigned for %s", self.topic_name_pattern) consumer.assign(partitions) async def consume(self): """Asynchronously consumes data from kafka topic""" while True: num_results = 1 while num_results > 0: num_results = self._consume() await gen.sleep(self.sleep_secs) def _consume(self): """Polls for a message. Returns 1 if a message was received, 0 otherwise""" # Poll Kafka for messages. Make sure to handle any errors or exceptions. # Additionally, make sure you return 1 when a message is processed, and 0 when no message # is retrieved. logger.debug(f"In _consume({self.topic_name_pattern})") try: msg = self.consumer.poll(timeout=self.consume_timeout) if (msg is None): logger.debug("No msg in topic yet.") return 0; else: if (msg.error() is None): logger.debug("Got msg.") self.message_handler(msg) else: # handle error. error = msg.error() logger.error(f"Error in consumer:{self.topic_name_pattern} while consuming msgs. Err code: {error.code()}, error-name: {error.name()}, error.str:{error.str()}" ) return 1 except RuntimeError as re: logger.error(f"Runtime error in consumer:{self.topic_name_pattern}. Err msg: {re.message}" ) def close(self): """Cleans up any open kafka consumers""" # Cleanup the kafka consumer self.consumer.unassign() self.consumer.unsubscribe()
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb(err, partitions): pass kc = Consumer({ 'group.id': 'test', 'socket.timeout.ms': '100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb }) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke(consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') if msg is not None: assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1) msglist = kc.consume(num_messages=10, timeout=0.001) assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist) with pytest.raises(ValueError) as ex: kc.consume(-100) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) with pytest.raises(ValueError) as ex: kc.consume(1000001) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) partitions = list( map(lambda part: TopicPartition("test", part), range(0, 100, 3))) kc.assign(partitions) with pytest.raises(KafkaException) as ex: kc.seek(TopicPartition("test", 0, 123)) assert 'Erroneous state' in str(ex.value) # Verify assignment assignment = kc.assignment() assert partitions == assignment # Pause partitions kc.pause(partitions) # Resume partitions kc.resume(partitions) # Get cached watermarks, should all be invalid. lo, hi = kc.get_watermark_offsets(partitions[0], cached=True) assert lo == -1001 and hi == -1001 assert lo == OFFSET_INVALID and hi == OFFSET_INVALID # Query broker for watermarks, should raise an exception. try: lo, hi = kc.get_watermark_offsets(partitions[0], timeout=0.5, cached=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\ str(e.args([0])) kc.unassign() kc.commit(asynchronous=True) try: kc.commit(asynchronous=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == OFFSET_INVALID]) == len(partitions) try: kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT try: kc.list_topics(timeout=0.2) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) try: kc.list_topics(topic="hi", timeout=0.1) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) kc.close()
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb(err, partitions): pass kc = Consumer({ 'group.id': 'test', 'socket.timeout.ms': '100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb }) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke(consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') if msg is not None: assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1) partitions = list( map(lambda p: TopicPartition("test", p), range(0, 100, 3))) kc.assign(partitions) kc.unassign() kc.commit(async=True) try: kc.commit(async=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == -1001]) == len(partitions) try: offsets = kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT kc.close()
class KafkaConsumer: """Defines the base kafka consumer class""" def __init__( self, topic_name_pattern, message_handler, is_avro=True, offset_earliest=False, sleep_secs=1.0, consume_timeout=0.1, ): print("Init for topic", topic_name_pattern) """Creates a consumer object for asynchronous use""" self.topic_name_pattern = topic_name_pattern self.message_handler = message_handler self.sleep_secs = sleep_secs self.consume_timeout = consume_timeout self.offset_earliest = offset_earliest # # # TODO: Configure the broker properties below. Make sure to reference the project README # and use the Host URL for Kafka and Schema Registry! # # self.broker_properties = { "bootstrap.servers": "localhost:9092", "group.id": self.topic_name_pattern, 'auto.offset.reset': 'earliest' } # TODO: Create the Consumer, using the appropriate type. if is_avro is True: self.broker_properties[ "schema.registry.url"] = "http://localhost:8081" self.consumer = AvroConsumer(self.broker_properties) else: self.consumer = Consumer(self.broker_properties) # # # TODO: Configure the AvroConsumer and subscribe to the topics. Make sure to think about # how the `on_assign` callback should be invoked. # # self.consumer.subscribe([self.topic_name_pattern], on_assign=self.on_assign) print(f"Init complete for:{self.topic_name_pattern}") def on_assign(self, consumer, partitions): """Callback for when topic assignment takes place""" # TODO: If the topic is configured to use `offset_earliest` set the partition offset to # the beginning or earliest print("on_assign is Running") for partition in partitions: # TODO if self.offset_earliest: partition.offset = 0 print("partition", partition) logger.info("partitions assigned for %s", self.topic_name_pattern) consumer.assign(partitions) async def consume(self): """Asynchronously consumes data from kafka topic""" while True: num_results = 1 while num_results > 0: num_results = self._consume() await gen.sleep(self.sleep_secs) def _consume(self): """Polls for a message. Returns 1 if a message was received, 0 otherwise""" # # # TODO: Poll Kafka for messages. Make sure to handle any errors or exceptions. # Additionally, make sure you return 1 when a message is processed, and 0 when no message # is retrieved. # # try: message = self.consumer.poll(1.0) except SerializerError as e: print("Message deserialization failed for {}: {}".format(msg, e)) return 0 print(f"message in _consume (): {message}") if message is None: return 0 elif message.error() is not None: logger.error("Error caused due to :", message.error()) return 0 else: print("message_handler called()") self.message_handler(message) return 1 def close(self): """Cleans up any open kafka consumers""" # TODO: Cleanup the kafka consumer # self.consumer.commit() self.consumer.unassign() self.consumer.unsubscribe() self.consumer.close()
class KafkaConsumer: """Defines the base kafka consumer class""" def __init__( self, topic_name_pattern, message_handler, is_avro=True, offset_earliest=False, sleep_secs=1.0, consume_timeout=0.1, ): """Creates a consumer object for asynchronous use""" self.topic_name_pattern = topic_name_pattern self.message_handler = message_handler self.sleep_secs = sleep_secs self.consume_timeout = consume_timeout self.offset_earliest = offset_earliest # # # TODO: Configure the broker properties below. Make sure to reference the project README # and use the Host URL for Kafka and Schema Registry! # # self.broker_properties = { "BROKER_URL": "PLAINTEXT://localhost:9092", "SCHEMA_REGISTRY_URL": "http://localhost:8081" } # TODO: Create the Consumer, using the appropriate type. if is_avro is True: self.broker_properties[ "schema.registry.url"] = "http://localhost:8081" self.consumer = AvroConsumer( { "bootstrap.servers": self.broker_properties['BROKER_URL'], "group.id": "0" }, schema_registry=self.broker_properties["schema.registry.url"], ) else: self.consumer = Consumer({ "bootstrap.servers": self.broker_properties['BROKER_URL'], "group.id": "0" }) def on_assign(self, consumer, partitions): """Callback for when topic assignment takes place""" # TODO: If the topic is configured to use `offset_earliest` set the partition offset to # the beginning or earliest logger.info("on_assign is incomplete - skipping") for partition in partitions: if offset_earliest: partition.offset = OFFSET_BEGINNING logger.info("partitions assigned for %s", self.topic_name_pattern) consumer.assign(partitions) async def consume(self): """Asynchronously consumes data from kafka topic""" while True: num_results = 1 while num_results > 0: num_results = self._consume() await gen.sleep(self.sleep_secs) def _consume(self): """Polls for a message. Returns 1 if a message was received, 0 otherwise""" # # # TODO: Poll Kafka for messages. Make sure to handle any errors or exceptions. # Additionally, make sure you return 1 when a message is processed, and 0 when no message # is retrieved. # # # TODO: Configure the on_assign callback c.subscribe([self.topic_name_pattern], on_assign=on_assign) message = c.poll(self.consume_timeout) if message is None: return 0 elif message.error() is not None: print(f"error from consumer {message.error()}") return 0 else: self.message_handler(message) #print(f"consumed message {message.key()}: {message.value()}") return 1 def close(self): """Cleans up any open kafka consumers""" # # # TODO: Cleanup the kafka consumer # # self.consumer.commit() self.consumer.unassign() self.consumer.unsubscribe() self.consumer.close()
class KafkaConsumer: """Defines the base kafka consumer class""" def __init__( self, topic_name_pattern, message_handler, is_avro=True, offset_earliest=False, sleep_secs=1.0, consume_timeout=0.1, ): """Creates a consumer object for asynchronous use""" self.topic_name_pattern = topic_name_pattern self.message_handler = message_handler self.sleep_secs = sleep_secs self.consume_timeout = consume_timeout self.offset_earliest = offset_earliest self.broker_properties = { "bootstrap.servers": "PLAINTEXT://localhost:9092", "group.id": f"consumer-group-{self.topic_name_pattern}", "auto.offset.reset": "earliest" } if is_avro is True: self.broker_properties[ "schema.registry.url"] = "http://localhost:8081" self.consumer = AvroConsumer(self.broker_properties) else: self.consumer = Consumer(self.broker_properties) self.consumer.subscribe([self.topic_name_pattern], on_assign=self.on_assign) def on_assign(self, consumer, partitions): """Callback for when topic assignment takes place""" for partition in partitions: if self.offset_earliest: partition.offset = OFFSET_BEGINNING logger.info("partitions assigned for %s", self.topic_name_pattern) consumer.assign(partitions) async def consume(self): """Asynchronously consumes data from kafka topic""" while True: num_results = 1 while num_results > 0: num_results = self._consume() await gen.sleep(self.sleep_secs) def _consume(self): """Polls for a message. Returns 1 if a message was received, 0 otherwise""" message = self.consumer.poll(1.0) if self._invalid_message(message): return 0 else: logger.debug( f"Message consumed: {message.key()}: {message.value()}") self.message_handler(message) return 1 def close(self): """Cleans up any open kafka consumers""" if self.consumer is not None: self.consumer.unassign() self.consumer.unsubscribe() def _invalid_message(self, message): """Check the validity of a given message""" invalid = False if message is None: invalid = True elif message.error() is not None: logger.warn(f"Error consuming message: {message.error()}") invalid = True return invalid
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb(err, partitions): pass kc = Consumer({'group.id': 'test', 'socket.timeout.ms': '100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb}) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke(consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') if msg is not None: assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1) msglist = kc.consume(num_messages=10, timeout=0.001) assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist) with pytest.raises(ValueError) as ex: kc.consume(-100) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) with pytest.raises(ValueError) as ex: kc.consume(1000001) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) partitions = list(map(lambda part: TopicPartition("test", part), range(0, 100, 3))) kc.assign(partitions) with pytest.raises(KafkaException) as ex: kc.seek(TopicPartition("test", 0, 123)) assert 'Erroneous state' in str(ex.value) # Verify assignment assignment = kc.assignment() assert partitions == assignment # Pause partitions kc.pause(partitions) # Resume partitions kc.resume(partitions) # Get cached watermarks, should all be invalid. lo, hi = kc.get_watermark_offsets(partitions[0], cached=True) assert lo == -1001 and hi == -1001 assert lo == OFFSET_INVALID and hi == OFFSET_INVALID # Query broker for watermarks, should raise an exception. try: lo, hi = kc.get_watermark_offsets(partitions[0], timeout=0.5, cached=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\ str(e.args([0])) kc.unassign() kc.commit(asynchronous=True) try: kc.commit(asynchronous=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == OFFSET_INVALID]) == len(partitions) try: kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT try: kc.list_topics(timeout=0.2) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) try: kc.list_topics(topic="hi", timeout=0.1) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) kc.close()