Beispiel #1
0
    def test_simple_end_to_end(self, source_type):
        # start brokers
        self.start_brokers(source_type=source_type)
        # start mirror maker
        self.mirror_maker = MirrorMaker2(self.test_context,
                                         num_nodes=1,
                                         source_cluster=self.source_broker,
                                         target_cluster=self.redpanda)
        topics = []
        for i in range(0, 10):
            topics.append(
                TopicSpec(partition_count=random.randint(1, 10),
                          retention_bytes=random.randint(100000000, 300000000),
                          retention_ms=random.randint(1 * 3600000,
                                                      10 * 3600000)))
        self.source_client.create_topic(topics)
        self.mirror_maker.start()
        # start source producer & target consumer
        self.start_workload()

        self.run_validation(consumer_timeout_sec=120)
        self.mirror_maker.stop()
        target_client = DefaultClient(self.redpanda)
        for t in topics:
            desc = target_client.describe_topic(t.name)
            self.logger.debug(f'source topic: {t}, target topic: {desc}')
            assert len(desc.partitions) == t.partition_count
Beispiel #2
0
    def test_availability_when_one_node_failed(self):
        self.redpanda = RedpandaService(
            self.test_context,
            3,
            extra_rp_conf={
                "enable_auto_rebalance_on_node_add": True,
                "group_topic_partitions": 1,
                "default_topic_replications": 3,
            })

        self.redpanda.start()
        spec = TopicSpec(name="test-topic",
                         partition_count=6,
                         replication_factor=3)

        DefaultClient(self.redpanda).create_topic(spec)
        self.topic = spec.name

        self.start_producer(1, throughput=10000)
        self.start_consumer(1)
        self.await_startup()
        # start failure injector with default parameters
        self.start_finjector()

        self.validate_records()
Beispiel #3
0
    def test_adding_nodes_to_cluster(self):
        self.redpanda = RedpandaService(
            self.test_context, 3, extra_rp_conf={"group_topic_partitions": 1})
        # start single node cluster
        self.redpanda.start(nodes=[self.redpanda.nodes[0]])
        # create some topics
        topics = []
        # include __consumer_offsets topic replica
        total_replicas = 1
        for partition_count in range(1, 5):
            name = f"topic{len(topics)}"
            spec = TopicSpec(name=name,
                             partition_count=partition_count,
                             replication_factor=1)
            total_replicas += partition_count
            topics.append(spec)

        for spec in topics:
            DefaultClient(self.redpanda).create_topic(spec)
            self.topic = spec.name

        self.start_producer(1)
        self.start_consumer(1)
        self.await_startup()
        # add second node
        self.redpanda.start_node(self.redpanda.nodes[1])
        kafkacat = KafkaCat(self.redpanda)

        def _replicas_per_node():
            node_replicas = {}
            md = kafkacat.metadata()
            self.redpanda.logger.info(f"metadata: {md}")
            for topic in md['topics']:
                for p in topic['partitions']:
                    for r in p['replicas']:
                        id = r['id']
                        if id not in node_replicas:
                            node_replicas[id] = 0
                        node_replicas[id] += 1

            return node_replicas

        def partitions_rebalanced():
            per_node = _replicas_per_node()
            self.redpanda.logger.info(f"replicas per node: {per_node}")
            if len(per_node) < len(self.redpanda.started_nodes()):
                return False

            replicas = sum(per_node.values())
            if replicas != total_replicas:
                return False

            return all(p[1] > 1 for p in per_node.items())

        wait_until(partitions_rebalanced, timeout_sec=30, backoff_sec=1)
        # add third node
        self.redpanda.start_node(self.redpanda.nodes[2])
        wait_until(partitions_rebalanced, timeout_sec=30, backoff_sec=1)

        self.run_validation(enable_idempotence=False, consumer_timeout_sec=45)
Beispiel #4
0
    def test_recovery_after_multiple_restarts(self):
        self.start_redpanda(3, extra_rp_conf=self._extra_rp_conf)
        spec = TopicSpec(partition_count=60, replication_factor=3)

        DefaultClient(self.redpanda).create_topic(spec)
        self.topic = spec.name

        rpk = RpkTool(self.redpanda)
        rpk.alter_topic_config(spec.name, 'redpanda.remote.write', 'true')
        rpk.alter_topic_config(spec.name, 'redpanda.remote.read', 'true')

        self.start_producer(1, throughput=100)
        self.start_consumer(1)
        self.await_startup()

        def no_under_replicated_partitions():
            metric_sample = self.redpanda.metrics_sample("under_replicated")
            for s in metric_sample.samples:
                if s.value > 0:
                    return False
            return True

        # restart all the nodes and wait for recovery
        for i in range(0, 10):
            for n in self.redpanda.nodes:
                self.redpanda.signal_redpanda(n)
                self.redpanda.start_node(n)
            wait_until(no_under_replicated_partitions, 30, 2)

        self.run_validation(enable_idempotence=False,
                            producer_timeout_sec=60,
                            consumer_timeout_sec=180)
Beispiel #5
0
 def delete_topic(name):
     try:
         DefaultClient(self.redpanda).delete_topic(name)
     except Exception as e:
         self.redpanda.logger.warn(f"error deleting topic {name} - {e}")
     try:
         return not is_topic_present(name)
     except Exception as e:
         self.redpanda.logger.warn(f"error while listing topics - {e}")
         return False
Beispiel #6
0
 def create_topic(spec):
     try:
         DefaultClient(self.redpanda).create_topic(spec)
     except Exception as e:
         self.redpanda.logger.warn(
             f"error creating topic {spec.name} - {e}")
     try:
         return is_topic_present(spec.name)
     except Exception as e:
         self.redpanda.logger.warn(f"error while listing topics - {e}")
         return False
Beispiel #7
0
 def start_redpanda(self, num_nodes=1, extra_rp_conf=None):
     if extra_rp_conf is not None:
         # merge both configurations, the extra_rp_conf passed in
         # paramter takes the precedence
         self._extra_rp_conf = {**self._extra_rp_conf, **extra_rp_conf}
     assert self.redpanda is None
     self.redpanda = RedpandaService(self.test_context,
                                     num_nodes,
                                     extra_rp_conf=self._extra_rp_conf)
     self.redpanda.start()
     self._client = DefaultClient(self.redpanda)
Beispiel #8
0
    def start_brokers(self, source_type=kafka_source):
        if source_type == TestMirrorMakerService.redpanda_source:
            self.source_broker = RedpandaService(self.test_context,
                                                 num_brokers=3)
        else:
            self.source_broker = KafkaServiceAdapter(
                self.test_context,
                KafkaService(self.test_context,
                             num_nodes=3,
                             zk=self.zk,
                             version=V_3_0_0))

        self.redpanda = RedpandaService(self.test_context, num_brokers=3)
        self.source_broker.start()
        self.redpanda.start()

        self.source_client = DefaultClient(self.source_broker)

        self.topic.partition_count = 1000 if self.redpanda.dedicated_nodes else 1
        self.source_client.create_topic(self.topic)
Beispiel #9
0
 def __init__(self,
              test_context,
              num_brokers=3,
              extra_rp_conf=dict(),
              enable_pp=False,
              enable_sr=False,
              num_cores=3):
     super(RedpandaTest, self).__init__(test_context)
     self.scale = Scale(test_context)
     self.redpanda = RedpandaService(test_context,
                                     num_brokers,
                                     extra_rp_conf=extra_rp_conf,
                                     enable_pp=enable_pp,
                                     enable_sr=enable_sr,
                                     num_cores=num_cores)
     self._client = DefaultClient(self.redpanda)
Beispiel #10
0
    def _create_random_topics(self, count):
        max_partitions = 10

        topics = []
        for i in range(0, count):
            name = f"topic-{i}"
            spec = TopicSpec(
                name=name,
                partition_count=random.randint(1, max_partitions),
                replication_factor=random.choice(ALLOWED_REPLICATION))

            topics.append(spec)

        for spec in topics:
            DefaultClient(self.redpanda).create_topic(spec)

        return topics
Beispiel #11
0
    def test_recovery_after_multiple_restarts(self):
        # If a debug build has to do a restart across a significant
        # number of partitions, it gets slow.  Use fewer partitions
        # on debug builds.
        partition_count = 10 if self.debug_mode else 60

        si_settings = SISettings(cloud_storage_reconciliation_interval_ms=500,
                                 cloud_storage_max_connections=5,
                                 log_segment_size=self.log_segment_size)
        self.s3_bucket_name = si_settings.cloud_storage_bucket

        self.start_redpanda(3,
                            extra_rp_conf=self._extra_rp_conf,
                            si_settings=si_settings)
        spec = TopicSpec(partition_count=partition_count, replication_factor=3)

        DefaultClient(self.redpanda).create_topic(spec)
        self.topic = spec.name

        rpk = RpkTool(self.redpanda)
        rpk.alter_topic_config(spec.name, 'redpanda.remote.write', 'true')
        rpk.alter_topic_config(spec.name, 'redpanda.remote.read', 'true')

        self.start_producer(1, throughput=100)
        self.start_consumer(1)
        self.await_startup()

        def no_under_replicated_partitions():
            metric_sample = self.redpanda.metrics_sample("under_replicated")
            for s in metric_sample.samples:
                if s.value > 0:
                    return False
            return True

        # restart all the nodes and wait for recovery
        for i in range(0, 10):
            for n in self.redpanda.nodes:
                self.redpanda.signal_redpanda(n)
                self.redpanda.start_node(n)
            wait_until(no_under_replicated_partitions, 30, 2)

        self.run_validation(enable_idempotence=False,
                            producer_timeout_sec=60,
                            consumer_timeout_sec=180)
Beispiel #12
0
    def __init__(self,
                 test_context,
                 num_brokers=None,
                 extra_rp_conf=dict(),
                 enable_pp=False,
                 enable_sr=False,
                 si_settings=None,
                 **kwargs):
        """
        Any trailing keyword arguments are passed through to the
        RedpandaService constructor.
        """
        super(RedpandaTest, self).__init__(test_context)
        self.scale = Scale(test_context)
        self.si_settings = si_settings

        if num_brokers is None:
            # Default to a 3 node cluster if sufficient nodes are available, else
            # a single node cluster.  This is just a default: tests are welcome
            # to override constructor to pass an explicit size.  This logic makes
            # it convenient to mix 3 node and 1 node cases in the same class, by
            # just modifying the @cluster node count per test.
            if test_context.cluster.available().size() >= 3:
                num_brokers = 3
            else:
                num_brokers = 1

        if self.si_settings:
            self.si_settings.load_context(self.logger, test_context)

        self.redpanda = RedpandaService(test_context,
                                        num_brokers,
                                        extra_rp_conf=extra_rp_conf,
                                        enable_pp=enable_pp,
                                        enable_sr=enable_sr,
                                        si_settings=self.si_settings,
                                        **kwargs)
        self._client = DefaultClient(self.redpanda)
Beispiel #13
0
    def test_recovery_after_catastrophic_failure(self):

        self.redpanda = RedpandaService(
            self.test_context,
            3,
            extra_rp_conf={
                "enable_auto_rebalance_on_node_add": True,
                "group_topic_partitions": 1,
                "default_topic_replications": 3,
            })

        self.redpanda.start()
        spec = TopicSpec(name="test-topic",
                         partition_count=6,
                         replication_factor=3)

        DefaultClient(self.redpanda).create_topic(spec)
        self.topic = spec.name

        self.start_producer(1, throughput=10000)
        self.start_consumer(1)
        self.await_startup()

        # inject permanent random failure
        f_spec = FailureSpec(random.choice(FailureSpec.FAILURE_TYPES),
                             random.choice(self.redpanda.nodes[0:1]))

        self.inject_failure(f_spec)

        # inject transient failure on other node
        f_spec = FailureSpec(random.choice(FailureSpec.FAILURE_TYPES),
                             self.redpanda.nodes[2],
                             length=2.0 if self.scale.local else 15.0)

        self.inject_failure(f_spec)

        self.validate_records()
Beispiel #14
0
    def test_cluster_is_available_during_upgrade_without_group_topic(self):
        '''
        Validates that cluster is available and healthy during 
        upgrade when `kafka_internal::group` topic is not present
        '''

        # set redpanda logical version to value without __consumer_offsets support
        self.redpanda = RedpandaService(
            self.test_context,
            5,
            extra_rp_conf={
                "group_topic_partitions": 16,
                "default_topic_replications": 3,
            },
            environment={"__REDPANDA_LOGICAL_VERSION": 1})

        self.redpanda.start()
        self._client = DefaultClient(self.redpanda)

        spec = TopicSpec(partition_count=6, replication_factor=3)
        self.client().create_topic(spec)
        self.topic = spec.name

        def cluster_is_stable():
            admin = Admin(self.redpanda)
            brokers = admin.get_brokers()
            if len(brokers) < 3:
                return False

            for b in brokers:
                self.logger.debug(f"broker:  {b}")
                if not (b['is_alive'] and 'disk_space' in b):
                    return False

            return True

        def node_stopped(node_id):
            admin = Admin(self.redpanda)
            brokers = admin.get_brokers()

            for b in brokers:
                self.logger.debug(f"broker:  {b}")
                if b['node_id'] == node_id:
                    return b['is_alive'] == False

            return False

        kcl = KCL(self.redpanda)

        # check that consumer offsets topic is not present
        topics = set(kcl.list_topics())

        assert "__consumer_offsets" not in topics

        # enable consumer offsets support
        self.redpanda.set_environment({"__REDPANDA_LOGICAL_VERSION": 2})

        def get_raft0_follower():
            ctrl = self.redpanda.controller
            node = random.choice(self.redpanda.nodes)
            while self.redpanda.idx(node) == self.redpanda.idx(ctrl):
                node = random.choice(self.redpanda.nodes)

            return node

        # restart node that is not controller
        n = get_raft0_follower()
        self.logger.info(f"restarting node {n.account.hostname}")
        self.redpanda.stop_node(n, timeout=60)
        # wait for leader balancer to start evening out leadership
        wait_until(lambda: node_stopped(self.redpanda.idx(n)),
                   90,
                   backoff_sec=2)
        self.redpanda.start_node(n)
        wait_until(cluster_is_stable, 90, backoff_sec=2)
Beispiel #15
0
    def test_migrating_consume_offsets(self, failures, cpus):
        '''
        Validates correctness while executing consumer offsets migration
        '''

        # set redpanda logical version to value without __consumer_offsets support
        self.redpanda = RedpandaService(
            self.test_context,
            5,
            resource_settings=ResourceSettings(num_cpus=cpus),
            extra_rp_conf={
                "group_topic_partitions": 16,
                "default_topic_replications": 3,
            },
            environment={"__REDPANDA_LOGICAL_VERSION": 1})

        self.redpanda.start()
        self._client = DefaultClient(self.redpanda)
        # set of failure suppressed nodes - required to make restarts deterministic
        suppressed = set()

        def failure_injector_loop():
            f_injector = FailureInjector(self.redpanda)
            while failures:
                f_type = random.choice(FailureSpec.FAILURE_TYPES)
                length = 0
                node = random.choice(self.redpanda.nodes)
                while self.redpanda.idx(node) in suppressed:
                    node = random.choice(self.redpanda.nodes)

                # allow suspending any node
                if f_type == FailureSpec.FAILURE_SUSPEND:
                    length = random.randint(
                        1,
                        ConsumerOffsetsMigrationTest.max_suspend_duration_sec)

                f_injector.inject_failure(
                    FailureSpec(node=node, type=f_type, length=length))

                delay = random.randint(
                    ConsumerOffsetsMigrationTest.min_inter_failure_time_sec,
                    ConsumerOffsetsMigrationTest.max_inter_failure_time_sec)
                self.redpanda.logger.info(
                    f"waiting {delay} seconds before next failure")
                time.sleep(delay)

        if failures:
            finjector_thread = threading.Thread(target=failure_injector_loop,
                                                args=())
            finjector_thread.daemon = True
            finjector_thread.start()
        spec = TopicSpec(partition_count=6, replication_factor=3)
        self.client().create_topic(spec)
        self.topic = spec.name

        self.start_producer(1, throughput=5000)
        self.start_consumer(1)
        self.await_startup()

        def cluster_is_stable():
            admin = Admin(self.redpanda)
            brokers = admin.get_brokers()
            if len(brokers) < 3:
                return False

            for b in brokers:
                self.logger.debug(f"broker:  {b}")
                if not (b['is_alive'] and 'disk_space' in b):
                    return False

            return True

        kcl = KCL(self.redpanda)

        def _group_present():
            return len(kcl.list_groups().splitlines()) > 1

        # make sure that group is there
        wait_until(_group_present, 10, 1)

        # check that consumer offsets topic is not present
        topics = set(kcl.list_topics())

        assert "__consumer_offsets" not in topics

        # enable consumer offsets support
        self.redpanda.set_environment({"__REDPANDA_LOGICAL_VERSION": 2})
        for n in self.redpanda.nodes:
            id = self.redpanda.idx(n)
            suppressed.add(id)
            self.redpanda.restart_nodes(n, stop_timeout=60)
            suppressed.remove(id)
            # wait for leader balancer to start evening out leadership
            wait_until(cluster_is_stable, 90, backoff_sec=2)

        def _consumer_offsets_present():
            try:
                partitions = list(
                    self.client().describe_topic("__consumer_offsets"))
                return len(partitions) > 0
            except:
                return False

        wait_until(_consumer_offsets_present, timeout_sec=90, backoff_sec=3)

        self.run_validation(min_records=100000,
                            producer_timeout_sec=300,
                            consumer_timeout_sec=180)
    def test_node_recovery(self, recovery_type):
        self.start_redpanda(num_nodes=3)
        kafka_tools = KafkaCliTools(self.redpanda)
        kafka_cat = KafkaCat(self.redpanda)
        # create topics
        topics = []
        for _ in range(0, 6):
            topics.append(TopicSpec(partition_count=random.randint(1, 10)))
        # chose one topic to run the main workload
        DefaultClient(self.redpanda).create_topic(topics)
        self.topic = random.choice(topics).name

        self.start_producer(1)
        self.start_consumer(2)
        self.await_startup()

        # chose another topic and populate it with data
        prepopulated_topic = random.choice(topics)

        while self.topic == prepopulated_topic.name:
            prepopulated_topic = random.choice(topics)

        # populate topic with data
        kafka_tools.produce(prepopulated_topic.name, 20000, 1024)

        def list_offsets():
            offsets = {}
            for p in range(0, prepopulated_topic.partition_count):
                offsets[p] = kafka_cat.list_offsets(prepopulated_topic.name, p)

        # store offsets
        offsets = list_offsets()

        self.redpanda.logger.info(f"Topic offsets: {offsets}")
        # stop one of the nodes and remove its data
        stopped = random.choice(self.redpanda.nodes)
        # prepare seed servers list
        seeds = map(lambda n: {
            "address": n.account.hostname,
            "port": 33145
        }, self.redpanda.nodes)
        seeds = list(
            filter(lambda n: n['address'] != stopped.account.hostname, seeds))

        self.redpanda.stop_node(stopped)
        if recovery_type == FullNodeRecoveryTest.FULL_RECOVERY:
            self.redpanda.clean_node(stopped, preserve_logs=True)

        # produce some more data to make sure that stopped node is behind
        kafka_tools.produce(prepopulated_topic.name, 20000, 1024)

        # start node with the same node id, and not empty seed server list to

        # give node more time to start as it has to recover
        self.redpanda.start_node(stopped,
                                 override_cfg_params={'seed_servers': seeds},
                                 timeout=90)

        def all_topics_recovered():
            metric = self.redpanda.metrics_sample("under_replicated_replicas",
                                                  self.redpanda.nodes)
            under_replicated = filter(lambda s: s.value == 1, metric.samples)
            under_replicated = list(
                map(
                    lambda s: (s.labels['namespace'], s.labels['topic'], s.
                               labels['partition']), under_replicated))
            self.redpanda.logger.info(
                f"under replicated partitions: {list(under_replicated)}")
            return len(under_replicated) == 0

        # wait for prepopulated topic to recover
        wait_until(all_topics_recovered, 60, 1)

        self.run_validation(min_records=20000,
                            enable_idempotence=False,
                            producer_timeout_sec=60,
                            consumer_timeout_sec=180)

        # validate prepopulated topic offsets
        assert offsets == list_offsets()
Beispiel #17
0
class TestMirrorMakerService(EndToEndTest):
    kafka_source = "kafka"
    redpanda_source = "redpanda"

    def __init__(self, test_context):
        super(TestMirrorMakerService, self).__init__(test_context)

        self.topic = TopicSpec(replication_factor=3)
        # create single zookeeper node for Kafka
        self.zk = ZookeeperService(self.test_context,
                                   num_nodes=1,
                                   version=V_3_0_0)
        self.source_broker = None

    def setUp(self):
        self.zk.start()

    def tearDown(self):
        # ducktape handle service teardown automatically, but it is hard
        # to tell what went wrong if one of the services hangs.  Do it
        # explicitly here with some logging, to enable debugging issues
        # like https://github.com/redpanda-data/redpanda/issues/4270

        if self.source_broker is not None:
            self.logger.info(
                f"Stopping source broker ({self.source_broker.__class__.__name__})..."
            )
            self.source_broker.stop()
            self.logger.info(
                f"Awaiting source broker ({self.source_broker.__class__.__name__})..."
            )

        self.logger.info("Stopping zookeeper...")
        self.zk.stop()
        self.logger.info("Awaiting zookeeper...")

    def start_brokers(self, source_type=kafka_source):
        if source_type == TestMirrorMakerService.redpanda_source:
            self.source_broker = RedpandaService(self.test_context,
                                                 num_brokers=3)
        else:
            self.source_broker = KafkaServiceAdapter(
                self.test_context,
                KafkaService(self.test_context,
                             num_nodes=3,
                             zk=self.zk,
                             version=V_3_0_0))

        self.redpanda = RedpandaService(self.test_context, num_brokers=3)
        self.source_broker.start()
        self.redpanda.start()

        self.source_client = DefaultClient(self.source_broker)

        self.topic.partition_count = 1000 if self.redpanda.dedicated_nodes else 1
        self.source_client.create_topic(self.topic)

    def start_workload(self):

        self.consumer = VerifiableConsumer(
            self.test_context,
            num_nodes=1,
            redpanda=self.redpanda,
            topic=self.topic.name,
            group_id='consumer_test_group',
            on_record_consumed=self.on_record_consumed)
        self.consumer.start()

        self.producer = VerifiableProducer(
            self.test_context,
            num_nodes=1,
            redpanda=self.source_broker,
            topic=self.topic.name,
            throughput=1000,
            message_validator=is_int_with_prefix)
        self.producer.start()

    def wait_for_n_messages(self, n_messages=100):
        """Wait for a minimum number of messages to be successfully produced."""
        wait_until(
            lambda: self.producer.num_acked > n_messages,
            timeout_sec=10,
            err_msg=
            "Producer failed to produce %d messages in a reasonable amount of time."
            % n_messages)

    @cluster(num_nodes=10)
    @parametrize(source_type=kafka_source)
    @parametrize(source_type=redpanda_source)
    def test_simple_end_to_end(self, source_type):
        # start brokers
        self.start_brokers(source_type=source_type)
        # start mirror maker
        self.mirror_maker = MirrorMaker2(self.test_context,
                                         num_nodes=1,
                                         source_cluster=self.source_broker,
                                         target_cluster=self.redpanda)
        topics = []
        for i in range(0, 10):
            topics.append(
                TopicSpec(partition_count=random.randint(1, 10),
                          retention_bytes=random.randint(100000000, 300000000),
                          retention_ms=random.randint(1 * 3600000,
                                                      10 * 3600000)))
        self.source_client.create_topic(topics)
        self.mirror_maker.start()
        # start source producer & target consumer
        self.start_workload()

        self.run_validation(consumer_timeout_sec=120)
        self.mirror_maker.stop()
        target_client = DefaultClient(self.redpanda)
        for t in topics:
            desc = target_client.describe_topic(t.name)
            self.logger.debug(f'source topic: {t}, target topic: {desc}')
            assert len(desc.partitions) == t.partition_count

    @cluster(num_nodes=9)
    @parametrize(source_type=kafka_source)
    @parametrize(source_type=redpanda_source)
    def test_consumer_group_mirroring(self, source_type):
        # start redpanda
        self.start_brokers(source_type=source_type)
        consumer_group = "test-group-1"
        # start mirror maker
        self.mirror_maker = MirrorMaker2(self.test_context,
                                         num_nodes=1,
                                         source_cluster=self.source_broker,
                                         target_cluster=self.redpanda,
                                         consumer_group_pattern=consumer_group,
                                         log_level="TRACE")
        self.mirror_maker.start()

        msg_size = 512
        msg_cnt = 1000000 if self.redpanda.dedicated_nodes else 100

        # produce some messages to source redpanda
        producer = RpkProducer(self.test_context,
                               self.source_broker,
                               self.topic.name,
                               msg_size,
                               msg_cnt,
                               acks=-1)

        producer.start()
        producer.wait()
        producer.free()

        # consume some messages from source redpanda
        consumer = RpkConsumer(self.test_context,
                               self.source_broker,
                               self.topic.name,
                               ignore_errors=False,
                               retries=3,
                               group=consumer_group,
                               save_msgs=False,
                               num_msgs=int(msg_cnt / 5))

        consumer.start()
        consumer.wait()
        consumer.stop()
        source_messages = consumer.messages
        self.logger.info(f"source message count: {len(source_messages)}")
        consumer.free()

        src_rpk = RpkTool(self.source_broker)
        source_group = src_rpk.group_describe(consumer_group)
        target_rpk = RpkTool(self.redpanda)

        def target_group_equal():
            try:
                target_group = target_rpk.group_describe(consumer_group)
            except RpkException as e:
                # e.g. COORDINATOR_NOT_AVAILABLE
                self.logger.info(f"Error describing target cluster group: {e}")
                return False

            self.logger.info(
                f"source {source_group}, target_group: {target_group}")
            return target_group.partitions == source_group.partitions and target_group.name == source_group.name

        # wait for consumer group sync
        timeout = 600 if self.redpanda.dedicated_nodes else 60
        wait_until(target_group_equal, timeout_sec=timeout, backoff_sec=5)

        self.mirror_maker.stop()