def test_kafka_mini_dataset_size(): """Test the functionality of batch.num.messages property of KafkaBatchIODataset/KafkaGroupIODataset. """ import tensorflow_io.kafka as kafka_io # Write new messages to the topic for i in range(200, 10000): message = "D{}".format(i) kafka_io.write_kafka(message=message, topic="key-partition-test") BATCH_NUM_MESSAGES = 5000 dataset = tfio.experimental.streaming.KafkaBatchIODataset( topics=["key-partition-test"], group_id="cgminibatchsize", servers=None, stream_timeout=5000, configuration=[ "session.timeout.ms=7000", "max.poll.interval.ms=8000", "auto.offset.reset=earliest", "batch.num.messages={}".format(BATCH_NUM_MESSAGES), ], ) for mini_d in dataset: count = 0 for _ in mini_d: count += 1 assert count == BATCH_NUM_MESSAGES break
def test_kafka_group_io_dataset_resume_primary_cg_new_topic(): """Test the functionality of the KafkaGroupIODataset when the consumer group is yet to catch up with the newly added messages only (Instead of reading from the beginning) from the new topic. """ import tensorflow_io.kafka as kafka_io # Write new messages to the topic for i in range(10, 100): message = f"D{i}" kafka_io.write_kafka(message=message, topic="key-test") # Read only the newly sent 90 messages dataset = tfio.experimental.streaming.KafkaGroupIODataset( topics=["key-test"], group_id="cgtestprimary", servers="localhost:9092", configuration=["session.timeout.ms=7000", "max.poll.interval.ms=8000"], ) assert np.all( sorted(k.numpy() for (k, _) in dataset) == sorted( ("D" + str(i)).encode() for i in range(10, 100)))
def test_write_kafka(self): """test_write_kafka""" tf.compat.v1.disable_eager_execution() import tensorflow_io.kafka as kafka_io # pylint: disable=wrong-import-position channel = "e{}e".format(time.time()) # Start with reading test topic, replace `D` with `e(time)e`, # and write to test_e(time)e` topic. dataset = kafka_io.KafkaDataset(topics=["test:0:0:4"], group="test", eof=True) dataset = dataset.map(lambda x: kafka_io.write_kafka( tf.strings.regex_replace(x, "D", channel), topic="test_" + channel) ) iterator = dataset.make_initializable_iterator() init_op = iterator.initializer get_next = iterator.get_next() with self.cached_session() as sess: # Basic test: read from topic 0. sess.run(init_op) for i in range(5): self.assertEqual((channel + str(i)).encode(), sess.run(get_next)) with self.assertRaises(tf.errors.OutOfRangeError): sess.run(get_next) # Reading from `test_e(time)e` we should get the same result dataset = kafka_io.KafkaDataset(topics=["test_" + channel], group="test", eof=True) iterator = dataset.make_initializable_iterator() init_op = iterator.initializer get_next = iterator.get_next() with self.cached_session() as sess: sess.run(init_op) for i in range(5): self.assertEqual((channel + str(i)).encode(), sess.run(get_next)) with self.assertRaises(tf.errors.OutOfRangeError): sess.run(get_next)
def write_messages_background(): # Write new messages to the topic in a background thread time.sleep(6) for i in range(100, 200): message = "D{}".format(i) kafka_io.write_kafka(message=message, topic="key-partition-test")