Example #1
0
def test_kafka_mini_dataset_size():
    """Test the functionality of batch.num.messages property of
    KafkaBatchIODataset/KafkaGroupIODataset.
    """
    import tensorflow_io.kafka as kafka_io

    # Write new messages to the topic
    for i in range(200, 10000):
        message = "D{}".format(i)
        kafka_io.write_kafka(message=message, topic="key-partition-test")

    BATCH_NUM_MESSAGES = 5000
    dataset = tfio.experimental.streaming.KafkaBatchIODataset(
        topics=["key-partition-test"],
        group_id="cgminibatchsize",
        servers=None,
        stream_timeout=5000,
        configuration=[
            "session.timeout.ms=7000",
            "max.poll.interval.ms=8000",
            "auto.offset.reset=earliest",
            "batch.num.messages={}".format(BATCH_NUM_MESSAGES),
        ],
    )
    for mini_d in dataset:
        count = 0
        for _ in mini_d:
            count += 1
        assert count == BATCH_NUM_MESSAGES
        break
Example #2
0
def test_kafka_group_io_dataset_resume_primary_cg_new_topic():
    """Test the functionality of the KafkaGroupIODataset when the
    consumer group is yet to catch up with the newly added messages only
    (Instead of reading from the beginning) from the new topic.
    """
    import tensorflow_io.kafka as kafka_io

    # Write new messages to the topic
    for i in range(10, 100):
        message = f"D{i}"
        kafka_io.write_kafka(message=message, topic="key-test")
    # Read only the newly sent 90 messages
    dataset = tfio.experimental.streaming.KafkaGroupIODataset(
        topics=["key-test"],
        group_id="cgtestprimary",
        servers="localhost:9092",
        configuration=["session.timeout.ms=7000", "max.poll.interval.ms=8000"],
    )
    assert np.all(
        sorted(k.numpy() for (k, _) in dataset) == sorted(
            ("D" + str(i)).encode() for i in range(10, 100)))
Example #3
0
    def test_write_kafka(self):
        """test_write_kafka"""
        tf.compat.v1.disable_eager_execution()

        import tensorflow_io.kafka as kafka_io  # pylint: disable=wrong-import-position

        channel = "e{}e".format(time.time())

        # Start with reading test topic, replace `D` with `e(time)e`,
        # and write to test_e(time)e` topic.
        dataset = kafka_io.KafkaDataset(topics=["test:0:0:4"],
                                        group="test",
                                        eof=True)
        dataset = dataset.map(lambda x: kafka_io.write_kafka(
            tf.strings.regex_replace(x, "D", channel), topic="test_" + channel)
                              )
        iterator = dataset.make_initializable_iterator()
        init_op = iterator.initializer
        get_next = iterator.get_next()

        with self.cached_session() as sess:
            # Basic test: read from topic 0.
            sess.run(init_op)
            for i in range(5):
                self.assertEqual((channel + str(i)).encode(),
                                 sess.run(get_next))
            with self.assertRaises(tf.errors.OutOfRangeError):
                sess.run(get_next)

        # Reading from `test_e(time)e` we should get the same result
        dataset = kafka_io.KafkaDataset(topics=["test_" + channel],
                                        group="test",
                                        eof=True)
        iterator = dataset.make_initializable_iterator()
        init_op = iterator.initializer
        get_next = iterator.get_next()

        with self.cached_session() as sess:
            sess.run(init_op)
            for i in range(5):
                self.assertEqual((channel + str(i)).encode(),
                                 sess.run(get_next))
            with self.assertRaises(tf.errors.OutOfRangeError):
                sess.run(get_next)
Example #4
0
 def write_messages_background():
     # Write new messages to the topic in a background thread
     time.sleep(6)
     for i in range(100, 200):
         message = "D{}".format(i)
         kafka_io.write_kafka(message=message, topic="key-partition-test")