Beispiel #1
0
    def test_consume_newest(self):
        topic = 'topic_newest'
        message = 'newest message'
        key = 'key'
        h_key = 'h_key'
        h_value = 'h_value'
        headers = [h_key + ':' + h_value]

        self._rpk.create_topic(topic)

        c = RpkConsumer(self._ctx, self.redpanda, topic, offset='newest')
        c.start()

        def cond():
            if c.error:
                raise c.error
            self._rpk.produce(topic, key, message, headers)
            return c.messages \
                and c.messages[0]['value'] == message \
                and c.messages[0]['key'] == key \
                and c.messages[0]['headers'] == [
                    {'key': h_key, 'value': h_value},
                ]

        wait_until(cond,
                   timeout_sec=150,
                   backoff_sec=30,
                   err_msg="Message didn't appear.")
Beispiel #2
0
    def test_consume_oldest(self):
        topic = 'topic'

        n = random.randint(10, 100)
        msgs = {}
        for i in range(n):
            msgs['key-' + str(i)] = 'message-' + str(i)

        # Produce messages
        for k in msgs:
            self._rpk.produce(topic, k, msgs[k])

        c = RpkConsumer(self._ctx, self.redpanda, topic)
        c.start()

        def cond():
            # Consume from the beginning
            if len(c.messages) != len(msgs):
                return False

            for m in c.messages:
                key = m['key']
                if key is None:
                    return False

                if m['message'] != msgs[key]:
                    return False

            return True

        wait_until(cond,
                   timeout_sec=30,
                   backoff_sec=8,
                   err_msg="Message didn't appear.")
Beispiel #3
0
    def test_produce(self):
        topic = 'topic'
        message = 'message'
        key = 'key'
        h_key = 'h_key'
        h_value = 'h_value'
        headers = [h_key + ':' + h_value]

        self._rpk.create_topic(topic)
        self._rpk.produce(topic, key, message, headers)

        c = RpkConsumer(self._ctx, self.redpanda, topic)
        c.start()

        def cond():
            return c.messages is not None \
                and len(c.messages) == 1 \
                and c.messages[0]['value'] == message \
                and c.messages[0]['key'] == key \
                and c.messages[0]['headers'] == [
                    {'key': h_key, 'value': h_value},
                ]

        wait_until(cond,
                   timeout_sec=120,
                   backoff_sec=30,
                   err_msg="Message didn't appear.")
Beispiel #4
0
    def test_consume_from_partition(self):
        topic = 'topic_partition'

        n_parts = random.randint(3, 100)
        self._rpk.create_topic(topic, partitions=n_parts)

        n = random.randint(10, 30)
        msgs = {}
        for i in range(n):
            msgs['key-' + str(i)] = 'message-' + str(i)

        part = random.randint(0, n_parts - 1)
        # Produce messages to a random partition
        for k in msgs:
            self._rpk.produce(topic, k, msgs[k], partition=part)

        # Consume from the beginning
        c = RpkConsumer(self._ctx,
                        self.redpanda,
                        topic,
                        offset='oldest',
                        partitions=[part])
        c.start()

        def cond():
            if len(c.messages) != len(msgs):
                return False

            for m in c.messages:
                key = m['key']
                if key is None:
                    return False

                if m['value'] != msgs[key]:
                    return False

            return True

        # timeout loop, but reset the timeout if we appear to be making progress
        retries = 10
        prev_msg_count = len(c.messages)
        while retries > 0:
            self.redpanda.logger.debug(
                f"Message count {len(c.messages)} retries {retries}")
            if cond():
                return
            if len(c.messages) > prev_msg_count:
                prev_msg_count = len(c.messages)
                retries = 10
            time.sleep(1)
            retries -= 1

        raise ducktape.errors.TimeoutError("Message didn't appear")
Beispiel #5
0
    def _consume_all(self, topic_names: list[str], msg_count_per_topic: int,
                     timeout_per_topic: int):
        """
        Don't do anything with the messages, just consume them to demonstrate
        that doing so does not exhaust redpanda resources.
        """
        def consumer_saw_msgs(consumer):
            self.logger.info(
                f"Consumer message_count={consumer.message_count} / {msg_count_per_topic}"
            )
            # Tolerate greater-than, because if there were errors during production
            # there can have been retries.
            return consumer.message_count >= msg_count_per_topic

        for tn in topic_names:
            consumer = RpkConsumer(self._ctx,
                                   self.redpanda,
                                   tn,
                                   save_msgs=False,
                                   fetch_max_bytes=BIG_FETCH,
                                   num_msgs=msg_count_per_topic)
            consumer.start()
            wait_until(lambda: consumer_saw_msgs(consumer),
                       timeout_sec=timeout_per_topic,
                       backoff_sec=5)
            consumer.stop()
            consumer.free()
Beispiel #6
0
    def test_many_clients(self):
        """
        Check that redpanda remains stable under higher numbers of clients
        than usual.
        """

        # This test requires dedicated system resources to run reliably.
        assert self.redpanda.dedicated_nodes

        # Scale tests are not run on debug builds
        assert not self.debug_mode

        PARTITION_COUNT = 100
        PRODUCER_COUNT = 4000
        TOPIC_NAME = "manyclients"
        RECORDS_PER_PRODUCER = 1000

        self.client().create_topic(
            TopicSpec(
                name=TOPIC_NAME,
                partition_count=PARTITION_COUNT,
                retention_bytes=10 * 1024 * 1024,
                segment_bytes=1024 * 1024 * 5,
            ))

        # Two consumers, just so that we are at least touching consumer
        # group functionality, if not stressing the overall number of consumers.
        consumer_a = RpkConsumer(self.test_context,
                                 self.redpanda,
                                 TOPIC_NAME,
                                 group="testgroup",
                                 save_msgs=False)
        consumer_b = RpkConsumer(self.test_context,
                                 self.redpanda,
                                 TOPIC_NAME,
                                 group="testgroup",
                                 save_msgs=False)

        producer = ProducerSwarm(self.test_context, self.redpanda, TOPIC_NAME,
                                 PRODUCER_COUNT, RECORDS_PER_PRODUCER)
        producer.start()
        consumer_a.start()
        consumer_b.start()

        producer.wait()

        def complete():
            expect = PRODUCER_COUNT * RECORDS_PER_PRODUCER
            self.logger.info(
                f"Message counts: {consumer_a.message_count} {consumer_b.message_count} (vs {expect})"
            )
            return consumer_a.message_count + consumer_b.message_count >= expect

        wait_until(complete,
                   timeout_sec=30,
                   backoff_sec=1,
                   err_msg="Consumers didn't see all messages")
Beispiel #7
0
    def test_exceed_broker_limit(self):
        self.redpanda.set_cluster_config({"kafka_connections_max": 6})

        metrics = [
            MetricCheck(self.logger, self.redpanda, n, REJECTED_METRIC, {},
                        sum) for n in self.redpanda.nodes
        ]

        # I happen to know that an `rpk topic consume` occupies three
        # connections.  So after opening two consumers, I should find
        # that a producer cannot get in.
        consumers = [
            RpkConsumer(self.test_context, self.redpanda, self.topic),
            RpkConsumer(self.test_context, self.redpanda, self.topic),
        ]

        for c in consumers:
            c.start()

        producer = RpkProducer(self.test_context,
                               self.redpanda,
                               self.topic,
                               msg_size=16384,
                               msg_count=1,
                               produce_timeout=5)
        producer.start()
        try:
            producer.wait()
        except Exception:
            # This is a non-specific exception because ducktape re-raises in wait()
            # as a bare Exception
            pass
        else:
            raise RuntimeError("Producer should have failed")

        for c in consumers:
            c.stop()
            c.wait()

        assert any([
            m.evaluate([(REJECTED_METRIC, lambda a, b: b > a)])
            for m in metrics
        ])
Beispiel #8
0
    def test_consume_from_partition(self):
        topic = 'topic_partition'

        n_parts = random.randint(3, 100)
        self._rpk.create_topic(topic, partitions=n_parts)

        n = random.randint(10, 30)
        msgs = {}
        for i in range(n):
            msgs['key-' + str(i)] = 'message-' + str(i)

        part = random.randint(0, n_parts)
        # Produce messages to a random partition
        for k in msgs:
            self._rpk.produce(topic, k, msgs[k], partition=part)

        # Consume from the beginning
        c = RpkConsumer(self._ctx,
                        self.redpanda,
                        topic,
                        offset='oldest',
                        partitions=[part])
        c.start()

        def cond():
            if len(c.messages) != len(msgs):
                return False

            for m in c.messages:
                key = m['key']
                if key is None:
                    return False

                if m['message'] != msgs[key]:
                    return False

            return True

        wait_until(cond,
                   timeout_sec=10,
                   backoff_sec=1,
                   err_msg="Message didn't appear.")
Beispiel #9
0
    def _consume_and_count_bytes(self):
        consumer = RpkConsumer(self._ctx, self.redpanda, self.topic)
        consumer.start()

        self._bytes_received = 0

        def count_bytes():
            for msg in consumer.messages:
                value = msg["value"]

                # Ignore None values
                if value is None:
                    return False

                self._bytes_received += len(value)

            # Returns true the first time any bytes
            # are fetched.
            return self._bytes_received > 0

        wait_until(count_bytes,
                   timeout_sec=30,
                   backoff_sec=5,
                   err_msg="count_bytes() failed")

        consumer.stop()
Beispiel #10
0
    def test_kafka_streams(self):
        example = self.create_example()

        # This will raise TypeError if PRODUCER is undefined
        producer = self.PRODUCER(self._ctx, self.redpanda, self.topics[0].name)
        consumer = RpkConsumer(self._ctx, self.redpanda, self.topics[1].name)

        # Start the example
        example.start()

        # Produce some data
        producer.start()
        producer.wait()

        # Consume the data
        consumer.start()

        def try_cons():
            i = 0
            msgs = consumer.messages
            while i < len(msgs) and not self.is_valid_msg(msgs[i]):
                i += 1

            return i < len(msgs)

        wait_until(
            try_cons,
            timeout_sec=self._timeout,
            backoff_sec=5,
            err_msg=f"kafka-streams {self._ctx.cls_name} consumer failed")

        consumer.stop()
        producer.stop()
        example.stop()
Beispiel #11
0
    def test_leadership_transfer(self):
        rpk = RpkTool(self.redpanda)
        topics = list(filter(lambda x: x.partition_count > 1, self.topics))
        group = "g0"

        producers = []
        for topic in topics:
            producer = RpkProducer(self._ctx,
                                   self.redpanda,
                                   topic.name,
                                   msg_size=5,
                                   msg_count=1000)
            producer.start()
            producers.append(producer)

        consumers = []
        for topic in topics:
            consumer = RpkConsumer(self._ctx,
                                   self.redpanda,
                                   topic.name,
                                   group=group)
            consumer.start()
            consumers.append(consumer)

        # Wait until cluster starts producing metrics
        wait_until(
            lambda: self.redpanda.metrics_sample("kafka_group_offset") != None,
            timeout_sec=30,
            backoff_sec=5)

        admin = Admin(redpanda=self.redpanda)

        def get_offset_with_node_from_metric(group):
            metric = self.redpanda.metrics_sample("kafka_group_offset")
            if metric is None:
                return None
            metric = metric.label_filter(dict(group=group))
            return metric.samples

        def get_group_leader():
            return admin.get_partitions(namespace="kafka_internal",
                                        topic="group",
                                        partition=0)['leader_id']

        def check_metric_from_node(node):
            metrics_offsets = get_offset_with_node_from_metric(group)
            if metrics_offsets == None:
                return False
            return all([
                metric.node.account.hostname == node.account.hostname
                for metric in metrics_offsets
            ])

        def transfer_completed(new_leader_node):
            return self.redpanda.nodes[get_group_leader() - 1].account.hostname \
                    == new_leader_node.account.hostname

        leader_node = self.redpanda.nodes[get_group_leader() - 1]
        check_metric_from_node(leader_node)

        # Check transfer leadership to another node
        for i in range(3):
            new_leader_node = random.choice(
                list(filter(lambda x: x != leader_node, self.redpanda.nodes)))

            admin.transfer_leadership_to(
                namespace="kafka_internal",
                topic="group",
                partition=0,
                target=self.redpanda.idx(new_leader_node))

            wait_until(lambda: transfer_completed(new_leader_node) and
                       check_metric_from_node(new_leader_node),
                       timeout_sec=30,
                       backoff_sec=5)

            leader_node = new_leader_node

        # Check transfer leadership to same node
        admin.transfer_leadership_to(namespace="kafka_internal",
                                     topic="group",
                                     partition=0,
                                     target=self.redpanda.idx(leader_node))

        wait_until(lambda: transfer_completed(leader_node) and
                   check_metric_from_node(leader_node),
                   timeout_sec=30,
                   backoff_sec=5)

        for host in producers + consumers:
            host.stop()
            host.free()
Beispiel #12
0
    def test_leadership_transfer(self):
        topics = list(filter(lambda x: x.partition_count > 1, self.topics))
        group = "g0"

        producers = []
        for topic in topics:
            producer = RpkProducer(self._ctx,
                                   self.redpanda,
                                   topic.name,
                                   msg_size=5,
                                   msg_count=1000)
            producer.start()
            producers.append(producer)

        consumers = []
        for topic in topics:
            consumer = RpkConsumer(self._ctx,
                                   self.redpanda,
                                   topic.name,
                                   group=group)
            consumer.start()
            consumers.append(consumer)

        # Wait until cluster starts producing metrics
        wait_until(
            lambda: self.redpanda.metrics_sample("kafka_group_offset") != None,
            timeout_sec=30,
            backoff_sec=5)

        admin = Admin(redpanda=self.redpanda)

        def get_group_partition():
            return admin.get_partitions(namespace="kafka",
                                        topic="__consumer_offsets",
                                        partition=0)

        def get_group_leader():
            return get_group_partition()['leader_id']

        def metrics_from_single_node(node):
            """
            Check that metrics are produced only by the given node.
            """
            metrics = self.redpanda.metrics_sample("kafka_group_offset")
            if not metrics:
                self.logger.debug("No metrics found")
                return False
            metrics = metrics.label_filter(dict(group=group)).samples
            for metric in metrics:
                self.logger.debug(
                    f"Retrieved metric from node={metric.node.account.hostname}: {metric}"
                )
            return all([
                metric.node.account.hostname == node.account.hostname
                for metric in metrics
            ])

        def transfer_leadership(new_leader):
            """
            Request leadership transfer of the internal consumer group partition
            and check that it completes successfully.
            """
            self.logger.debug(
                f"Transferring leadership to {new_leader.account.hostname}")
            admin.transfer_leadership_to(namespace="kafka",
                                         topic="__consumer_offsets",
                                         partition=0,
                                         target=self.redpanda.idx(new_leader))
            for _ in range(3):  # re-check a few times
                leader = get_group_leader()
                self.logger.debug(f"Current leader: {leader}")
                if leader != -1 and self.redpanda.get_node(
                        leader) == new_leader:
                    return True
                time.sleep(1)
            return False

        def partition_ready():
            """
            All replicas present and known leader
            """
            partition = get_group_partition()
            self.logger.debug(f"XXXXX: {partition}")
            return len(
                partition['replicas']) == 3 and partition['leader_id'] >= 0

        def select_next_leader():
            """
            Select a leader different than the current leader
            """
            wait_until(partition_ready, timeout_sec=30, backoff_sec=5)
            partition = get_group_partition()
            replicas = partition['replicas']
            assert len(replicas) == 3
            leader = partition['leader_id']
            assert leader >= 0
            replicas = filter(lambda r: r["node_id"] != leader, replicas)
            new_leader = random.choice(list(replicas))['node_id']
            return self.redpanda.get_node(new_leader)

        # repeat the following test a few times.
        #
        #  1. transfer leadership to a new node
        #  2. check that new leader reports metrics
        #  3. check that prev leader does not report
        #
        # note that here reporting does not mean that the node does not report
        # any metrics but that it does not report metrics for consumer groups
        # for which it is not leader.
        for _ in range(4):
            new_leader = select_next_leader()

            wait_until(lambda: transfer_leadership(new_leader),
                       timeout_sec=30,
                       backoff_sec=5)

            wait_until(lambda: metrics_from_single_node(new_leader),
                       timeout_sec=30,
                       backoff_sec=5)

        for host in producers + consumers:
            host.stop()
            host.free()
Beispiel #13
0
    def test_consumer_group_mirroring(self, source_type):
        # start redpanda
        self.start_brokers(source_type=source_type)
        consumer_group = "test-group-1"
        # start mirror maker
        self.mirror_maker = MirrorMaker2(self.test_context,
                                         num_nodes=1,
                                         source_cluster=self.source_broker,
                                         target_cluster=self.redpanda,
                                         consumer_group_pattern=consumer_group,
                                         log_level="TRACE")
        self.mirror_maker.start()

        msg_size = 512
        msg_cnt = 1000000 if self.redpanda.dedicated_nodes else 100

        # produce some messages to source redpanda
        producer = RpkProducer(self.test_context,
                               self.source_broker,
                               self.topic.name,
                               msg_size,
                               msg_cnt,
                               acks=-1)

        producer.start()
        producer.wait()
        producer.free()

        # consume some messages from source redpanda
        consumer = RpkConsumer(self.test_context,
                               self.source_broker,
                               self.topic.name,
                               ignore_errors=False,
                               retries=3,
                               group=consumer_group,
                               save_msgs=False,
                               num_msgs=int(msg_cnt / 5))

        consumer.start()
        consumer.wait()
        consumer.stop()
        source_messages = consumer.messages
        self.logger.info(f"source message count: {len(source_messages)}")
        consumer.free()

        src_rpk = RpkTool(self.source_broker)
        source_group = src_rpk.group_describe(consumer_group)
        target_rpk = RpkTool(self.redpanda)

        def target_group_equal():
            try:
                target_group = target_rpk.group_describe(consumer_group)
            except RpkException as e:
                # e.g. COORDINATOR_NOT_AVAILABLE
                self.logger.info(f"Error describing target cluster group: {e}")
                return False

            self.logger.info(
                f"source {source_group}, target_group: {target_group}")
            return target_group.partitions == source_group.partitions and target_group.name == source_group.name

        # wait for consumer group sync
        timeout = 600 if self.redpanda.dedicated_nodes else 60
        wait_until(target_group_equal, timeout_sec=timeout, backoff_sec=5)

        self.mirror_maker.stop()
    def test_static(self):
        """
        Move partitions with data, but no active producers or consumers.
        """
        self.logger.info(f"Starting redpanda...")
        self.start_redpanda(num_nodes=3)

        topics = []
        for partition_count in range(1, 5):
            for replication_factor in (1, 3):
                name = f"topic{len(topics)}"
                spec = TopicSpec(name=name,
                                 partition_count=partition_count,
                                 replication_factor=replication_factor)
                topics.append(spec)

        self.logger.info(f"Creating topics...")
        for spec in topics:
            self.client().create_topic(spec)

        num_records = 1000
        produced = set(
            ((f"key-{i:08d}", f"record-{i:08d}") for i in range(num_records)))

        for spec in topics:
            self.logger.info(f"Producing to {spec}")
            producer = KafProducer(self.test_context, self.redpanda, spec.name,
                                   num_records)
            producer.start()
            self.logger.info(
                f"Finished producing to {spec}, waiting for producer...")
            producer.wait()
            producer.free()
            self.logger.info(f"Producer stop complete.")

        for _ in range(25):
            self._move_and_verify()

        for spec in topics:
            self.logger.info(f"Verifying records in {spec}")

            consumer = RpkConsumer(self.test_context,
                                   self.redpanda,
                                   spec.name,
                                   ignore_errors=False,
                                   retries=0)
            consumer.start()
            timeout = 30
            t1 = time.time()
            consumed = set()
            while consumed != produced:
                if time.time() > t1 + timeout:
                    self.logger.error(
                        f"Validation failed for topic {spec.name}.  Produced {len(produced)}, consumed {len(consumed)}"
                    )
                    self.logger.error(
                        f"Messages consumed but not produced: {sorted(consumed - produced)}"
                    )
                    self.logger.error(
                        f"Messages produced but not consumed: {sorted(produced - consumed)}"
                    )
                    assert set(consumed) == produced
                else:
                    time.sleep(5)
                    for m in consumer.messages:
                        self.logger.info(f"message: {m}")
                    consumed = set([(m['key'], m['value'])
                                    for m in consumer.messages])

            self.logger.info(f"Stopping consumer...")
            consumer.stop()
            self.logger.info(f"Awaiting consumer...")
            consumer.wait()
            self.logger.info(f"Freeing consumer...")
            consumer.free()

            self.logger.info(f"Finished verifying records in {spec}")