Example #1
0
 def start_producer(self, num_nodes=1, throughput=1000):
     assert self.redpanda
     assert self.topic
     self.producer = VerifiableProducer(self.test_context,
                                        num_nodes=num_nodes,
                                        redpanda=self.redpanda,
                                        topic=self.topic,
                                        throughput=throughput)
     self.producer.start()
Example #2
0
 def start_producer(self, num_nodes=1, throughput=1000):
     assert self.redpanda
     assert self.topic
     self.producer = VerifiableProducer(
         self.test_context,
         num_nodes=num_nodes,
         redpanda=self.redpanda,
         topic=self.topic,
         throughput=throughput,
         message_validator=is_int_with_prefix)
     self.producer.start()
Example #3
0
    def start_workload(self):

        self.consumer = VerifiableConsumer(
            self.test_context,
            num_nodes=1,
            redpanda=self.redpanda,
            topic=self.topic.name,
            group_id='consumer_test_group',
            on_record_consumed=self.on_record_consumed)
        self.consumer.start()

        self.producer = VerifiableProducer(
            self.test_context,
            num_nodes=1,
            redpanda=self.source_broker,
            topic=self.topic.name,
            throughput=1000,
            message_validator=is_int_with_prefix)
        self.producer.start()
Example #4
0
class EndToEndTest(Test):
    """
    Test for common pattern:
      - Produce and consume in the background
      - Perform some action (e.g. partition movement)
      - Run validation
    """
    def __init__(self, test_context, extra_rp_conf=None):
        super(EndToEndTest, self).__init__(test_context=test_context)
        if extra_rp_conf is None:
            self._extra_rp_conf = {}
        else:
            self._extra_rp_conf = extra_rp_conf
        self.records_consumed = []
        self.last_consumed_offsets = {}
        self.redpanda = None
        self.topic = None
        self._client = None

    def start_redpanda(self, num_nodes=1, extra_rp_conf=None):
        if extra_rp_conf is not None:
            # merge both configurations, the extra_rp_conf passed in
            # paramter takes the precedence
            self._extra_rp_conf = {**self._extra_rp_conf, **extra_rp_conf}
        assert self.redpanda is None
        self.redpanda = RedpandaService(self.test_context,
                                        num_nodes,
                                        extra_rp_conf=self._extra_rp_conf)
        self.redpanda.start()
        self._client = DefaultClient(self.redpanda)

    def client(self):
        assert self._client is not None
        return self._client

    def start_consumer(self, num_nodes=1, group_id="test_group"):
        assert self.redpanda
        assert self.topic
        self.consumer = VerifiableConsumer(
            self.test_context,
            num_nodes=num_nodes,
            redpanda=self.redpanda,
            topic=self.topic,
            group_id=group_id,
            on_record_consumed=self.on_record_consumed)
        self.consumer.start()

    def start_producer(self, num_nodes=1, throughput=1000):
        assert self.redpanda
        assert self.topic
        self.producer = VerifiableProducer(
            self.test_context,
            num_nodes=num_nodes,
            redpanda=self.redpanda,
            topic=self.topic,
            throughput=throughput,
            message_validator=is_int_with_prefix)
        self.producer.start()

    def on_record_consumed(self, record, node):
        partition = TopicPartition(record["topic"], record["partition"])
        record_id = record["value"]
        offset = record["offset"]
        self.last_consumed_offsets[partition] = offset
        self.records_consumed.append(record_id)

    def await_consumed_offsets(self, last_acked_offsets, timeout_sec):
        def has_finished_consuming():
            for partition, offset in last_acked_offsets.items():
                if not partition in self.last_consumed_offsets:
                    return False
                last_commit = self.consumer.last_commit(partition)
                if not last_commit or last_commit <= offset:
                    self.logger.debug(
                        f"waiting for partition {partition} offset {offset} to be committed, last committed offset: {last_commit}"
                    )
                    return False
            return True

        wait_until(has_finished_consuming,
                   timeout_sec=timeout_sec,
                   err_msg="Consumer failed to consume up to offsets %s after waiting %ds." %\
                   (str(last_acked_offsets), timeout_sec))

    def _collect_all_logs(self):
        for s in self.test_context.services:
            self.mark_for_collect(s)

    def await_startup(self, min_records=5, timeout_sec=30):
        try:
            wait_until(lambda: self.consumer.total_consumed() >= min_records,
                       timeout_sec=timeout_sec,
                       err_msg="Timed out after %ds while awaiting initial record delivery of %d records" %\
                       (timeout_sec, min_records))
        except BaseException:
            self._collect_all_logs()
            raise

    def run_validation(self,
                       min_records=5000,
                       producer_timeout_sec=30,
                       consumer_timeout_sec=30,
                       enable_idempotence=False):
        try:
            wait_until(lambda: self.producer.num_acked > min_records,
                       timeout_sec=producer_timeout_sec,
                       err_msg="Producer failed to produce messages for %ds." %\
                       producer_timeout_sec)

            self.logger.info("Stopping producer after writing up to offsets %s" %\
                         str(self.producer.last_acked_offsets))
            self.producer.stop()

            self.await_consumed_offsets(self.producer.last_acked_offsets,
                                        consumer_timeout_sec)
            self.consumer.stop()

            self.validate(enable_idempotence)
        except BaseException:
            self._collect_all_logs()
            raise

    def validate(self, enable_idempotence):
        self.logger.info("Number of acked records: %d" %
                         len(self.producer.acked))
        self.logger.info("Number of consumed records: %d" %
                         len(self.records_consumed))

        success = True
        msg = ""

        # Correctness of the set difference operation depends on using equivalent
        # message_validators in producer and consumer
        missing = set(self.producer.acked) - set(self.records_consumed)

        if len(missing) > 0:
            success = False
            msg = annotate_missing_msgs(missing, self.producer.acked,
                                        self.records_consumed, msg)

        # Are there duplicates?
        if len(set(self.records_consumed)) != len(self.records_consumed):
            num_duplicates = abs(
                len(set(self.records_consumed)) - len(self.records_consumed))

            if enable_idempotence:
                success = False
                msg += "Detected %d duplicates even though idempotence was enabled.\n" % num_duplicates
            else:
                msg += "(There are also %d duplicate messages in the log - but that is an acceptable outcome)\n" % num_duplicates

        # Collect all logs if validation fails
        if not success:
            self._collect_all_logs()

        assert success, msg
Example #5
0
class TestMirrorMakerService(EndToEndTest):
    kafka_source = "kafka"
    redpanda_source = "redpanda"

    def __init__(self, test_context):
        super(TestMirrorMakerService, self).__init__(test_context)

        self.topic = TopicSpec(replication_factor=3)
        # create single zookeeper node for Kafka
        self.zk = ZookeeperService(self.test_context,
                                   num_nodes=1,
                                   version=V_3_0_0)
        self.source_broker = None

    def setUp(self):
        self.zk.start()

    def tearDown(self):
        # ducktape handle service teardown automatically, but it is hard
        # to tell what went wrong if one of the services hangs.  Do it
        # explicitly here with some logging, to enable debugging issues
        # like https://github.com/redpanda-data/redpanda/issues/4270

        if self.source_broker is not None:
            self.logger.info(
                f"Stopping source broker ({self.source_broker.__class__.__name__})..."
            )
            self.source_broker.stop()
            self.logger.info(
                f"Awaiting source broker ({self.source_broker.__class__.__name__})..."
            )

        self.logger.info("Stopping zookeeper...")
        self.zk.stop()
        self.logger.info("Awaiting zookeeper...")

    def start_brokers(self, source_type=kafka_source):
        if source_type == TestMirrorMakerService.redpanda_source:
            self.source_broker = RedpandaService(self.test_context,
                                                 num_brokers=3)
        else:
            self.source_broker = KafkaServiceAdapter(
                self.test_context,
                KafkaService(self.test_context,
                             num_nodes=3,
                             zk=self.zk,
                             version=V_3_0_0))

        self.redpanda = RedpandaService(self.test_context, num_brokers=3)
        self.source_broker.start()
        self.redpanda.start()

        self.source_client = DefaultClient(self.source_broker)

        self.topic.partition_count = 1000 if self.redpanda.dedicated_nodes else 1
        self.source_client.create_topic(self.topic)

    def start_workload(self):

        self.consumer = VerifiableConsumer(
            self.test_context,
            num_nodes=1,
            redpanda=self.redpanda,
            topic=self.topic.name,
            group_id='consumer_test_group',
            on_record_consumed=self.on_record_consumed)
        self.consumer.start()

        self.producer = VerifiableProducer(
            self.test_context,
            num_nodes=1,
            redpanda=self.source_broker,
            topic=self.topic.name,
            throughput=1000,
            message_validator=is_int_with_prefix)
        self.producer.start()

    def wait_for_n_messages(self, n_messages=100):
        """Wait for a minimum number of messages to be successfully produced."""
        wait_until(
            lambda: self.producer.num_acked > n_messages,
            timeout_sec=10,
            err_msg=
            "Producer failed to produce %d messages in a reasonable amount of time."
            % n_messages)

    @cluster(num_nodes=10)
    @parametrize(source_type=kafka_source)
    @parametrize(source_type=redpanda_source)
    def test_simple_end_to_end(self, source_type):
        # start brokers
        self.start_brokers(source_type=source_type)
        # start mirror maker
        self.mirror_maker = MirrorMaker2(self.test_context,
                                         num_nodes=1,
                                         source_cluster=self.source_broker,
                                         target_cluster=self.redpanda)
        topics = []
        for i in range(0, 10):
            topics.append(
                TopicSpec(partition_count=random.randint(1, 10),
                          retention_bytes=random.randint(100000000, 300000000),
                          retention_ms=random.randint(1 * 3600000,
                                                      10 * 3600000)))
        self.source_client.create_topic(topics)
        self.mirror_maker.start()
        # start source producer & target consumer
        self.start_workload()

        self.run_validation(consumer_timeout_sec=120)
        self.mirror_maker.stop()
        target_client = DefaultClient(self.redpanda)
        for t in topics:
            desc = target_client.describe_topic(t.name)
            self.logger.debug(f'source topic: {t}, target topic: {desc}')
            assert len(desc.partitions) == t.partition_count

    @cluster(num_nodes=9)
    @parametrize(source_type=kafka_source)
    @parametrize(source_type=redpanda_source)
    def test_consumer_group_mirroring(self, source_type):
        # start redpanda
        self.start_brokers(source_type=source_type)
        consumer_group = "test-group-1"
        # start mirror maker
        self.mirror_maker = MirrorMaker2(self.test_context,
                                         num_nodes=1,
                                         source_cluster=self.source_broker,
                                         target_cluster=self.redpanda,
                                         consumer_group_pattern=consumer_group,
                                         log_level="TRACE")
        self.mirror_maker.start()

        msg_size = 512
        msg_cnt = 1000000 if self.redpanda.dedicated_nodes else 100

        # produce some messages to source redpanda
        producer = RpkProducer(self.test_context,
                               self.source_broker,
                               self.topic.name,
                               msg_size,
                               msg_cnt,
                               acks=-1)

        producer.start()
        producer.wait()
        producer.free()

        # consume some messages from source redpanda
        consumer = RpkConsumer(self.test_context,
                               self.source_broker,
                               self.topic.name,
                               ignore_errors=False,
                               retries=3,
                               group=consumer_group,
                               save_msgs=False,
                               num_msgs=int(msg_cnt / 5))

        consumer.start()
        consumer.wait()
        consumer.stop()
        source_messages = consumer.messages
        self.logger.info(f"source message count: {len(source_messages)}")
        consumer.free()

        src_rpk = RpkTool(self.source_broker)
        source_group = src_rpk.group_describe(consumer_group)
        target_rpk = RpkTool(self.redpanda)

        def target_group_equal():
            try:
                target_group = target_rpk.group_describe(consumer_group)
            except RpkException as e:
                # e.g. COORDINATOR_NOT_AVAILABLE
                self.logger.info(f"Error describing target cluster group: {e}")
                return False

            self.logger.info(
                f"source {source_group}, target_group: {target_group}")
            return target_group.partitions == source_group.partitions and target_group.name == source_group.name

        # wait for consumer group sync
        timeout = 600 if self.redpanda.dedicated_nodes else 60
        wait_until(target_group_equal, timeout_sec=timeout, backoff_sec=5)

        self.mirror_maker.stop()