Ejemplo n.º 1
0
 def _start_mconsumer(self, materialized_topic):
     self.mconsumer = VerifiableConsumer(
         self.test_context,
         num_nodes=1,
         redpanda=self.redpanda,
         topic=materialized_topic,
         group_id="test_group",
         on_record_consumed=self._on_data_consumed)
     self.mconsumer.start()
Ejemplo n.º 2
0
 def start_consumer(self, num_nodes=1, group_id="test_group"):
     assert self.redpanda
     assert self.topic
     self.consumer = VerifiableConsumer(
         self.test_context,
         num_nodes=num_nodes,
         redpanda=self.redpanda,
         topic=self.topic,
         group_id=group_id,
         on_record_consumed=self.on_record_consumed)
     self.consumer.start()
Ejemplo n.º 3
0
    def start_workload(self):

        self.consumer = VerifiableConsumer(
            self.test_context,
            num_nodes=1,
            redpanda=self.redpanda,
            topic=self.topic.name,
            group_id='consumer_test_group',
            on_record_consumed=self.on_record_consumed)
        self.consumer.start()

        self.producer = VerifiableProducer(
            self.test_context,
            num_nodes=1,
            redpanda=self.source_broker,
            topic=self.topic.name,
            throughput=1000,
            message_validator=is_int_with_prefix)
        self.producer.start()
Ejemplo n.º 4
0
class EndToEndTest(Test):
    """
    Test for common pattern:
      - Produce and consume in the background
      - Perform some action (e.g. partition movement)
      - Run validation
    """
    def __init__(self, test_context, extra_rp_conf=None):
        super(EndToEndTest, self).__init__(test_context=test_context)
        if extra_rp_conf is None:
            self._extra_rp_conf = {}
        else:
            self._extra_rp_conf = extra_rp_conf
        self.records_consumed = []
        self.last_consumed_offsets = {}
        self.redpanda = None
        self.topic = None
        self._client = None

    def start_redpanda(self, num_nodes=1, extra_rp_conf=None):
        if extra_rp_conf is not None:
            # merge both configurations, the extra_rp_conf passed in
            # paramter takes the precedence
            self._extra_rp_conf = {**self._extra_rp_conf, **extra_rp_conf}
        assert self.redpanda is None
        self.redpanda = RedpandaService(self.test_context,
                                        num_nodes,
                                        extra_rp_conf=self._extra_rp_conf)
        self.redpanda.start()
        self._client = DefaultClient(self.redpanda)

    def client(self):
        assert self._client is not None
        return self._client

    def start_consumer(self, num_nodes=1, group_id="test_group"):
        assert self.redpanda
        assert self.topic
        self.consumer = VerifiableConsumer(
            self.test_context,
            num_nodes=num_nodes,
            redpanda=self.redpanda,
            topic=self.topic,
            group_id=group_id,
            on_record_consumed=self.on_record_consumed)
        self.consumer.start()

    def start_producer(self, num_nodes=1, throughput=1000):
        assert self.redpanda
        assert self.topic
        self.producer = VerifiableProducer(
            self.test_context,
            num_nodes=num_nodes,
            redpanda=self.redpanda,
            topic=self.topic,
            throughput=throughput,
            message_validator=is_int_with_prefix)
        self.producer.start()

    def on_record_consumed(self, record, node):
        partition = TopicPartition(record["topic"], record["partition"])
        record_id = record["value"]
        offset = record["offset"]
        self.last_consumed_offsets[partition] = offset
        self.records_consumed.append(record_id)

    def await_consumed_offsets(self, last_acked_offsets, timeout_sec):
        def has_finished_consuming():
            for partition, offset in last_acked_offsets.items():
                if not partition in self.last_consumed_offsets:
                    return False
                last_commit = self.consumer.last_commit(partition)
                if not last_commit or last_commit <= offset:
                    self.logger.debug(
                        f"waiting for partition {partition} offset {offset} to be committed, last committed offset: {last_commit}"
                    )
                    return False
            return True

        wait_until(has_finished_consuming,
                   timeout_sec=timeout_sec,
                   err_msg="Consumer failed to consume up to offsets %s after waiting %ds." %\
                   (str(last_acked_offsets), timeout_sec))

    def _collect_all_logs(self):
        for s in self.test_context.services:
            self.mark_for_collect(s)

    def await_startup(self, min_records=5, timeout_sec=30):
        try:
            wait_until(lambda: self.consumer.total_consumed() >= min_records,
                       timeout_sec=timeout_sec,
                       err_msg="Timed out after %ds while awaiting initial record delivery of %d records" %\
                       (timeout_sec, min_records))
        except BaseException:
            self._collect_all_logs()
            raise

    def run_validation(self,
                       min_records=5000,
                       producer_timeout_sec=30,
                       consumer_timeout_sec=30,
                       enable_idempotence=False):
        try:
            wait_until(lambda: self.producer.num_acked > min_records,
                       timeout_sec=producer_timeout_sec,
                       err_msg="Producer failed to produce messages for %ds." %\
                       producer_timeout_sec)

            self.logger.info("Stopping producer after writing up to offsets %s" %\
                         str(self.producer.last_acked_offsets))
            self.producer.stop()

            self.await_consumed_offsets(self.producer.last_acked_offsets,
                                        consumer_timeout_sec)
            self.consumer.stop()

            self.validate(enable_idempotence)
        except BaseException:
            self._collect_all_logs()
            raise

    def validate(self, enable_idempotence):
        self.logger.info("Number of acked records: %d" %
                         len(self.producer.acked))
        self.logger.info("Number of consumed records: %d" %
                         len(self.records_consumed))

        success = True
        msg = ""

        # Correctness of the set difference operation depends on using equivalent
        # message_validators in producer and consumer
        missing = set(self.producer.acked) - set(self.records_consumed)

        if len(missing) > 0:
            success = False
            msg = annotate_missing_msgs(missing, self.producer.acked,
                                        self.records_consumed, msg)

        # Are there duplicates?
        if len(set(self.records_consumed)) != len(self.records_consumed):
            num_duplicates = abs(
                len(set(self.records_consumed)) - len(self.records_consumed))

            if enable_idempotence:
                success = False
                msg += "Detected %d duplicates even though idempotence was enabled.\n" % num_duplicates
            else:
                msg += "(There are also %d duplicate messages in the log - but that is an acceptable outcome)\n" % num_duplicates

        # Collect all logs if validation fails
        if not success:
            self._collect_all_logs()

        assert success, msg
Ejemplo n.º 5
0
class WasmPartitionMovementTest(PartitionMovementMixin, EndToEndTest):
    """
    Tests to ensure that the materialized partition movement feature
    is working as expected. This feature has materialized topics move to
    where their respective sources are moved to.
    """
    def __init__(self, ctx, *args, **kvargs):
        super(WasmPartitionMovementTest, self).__init__(
            ctx,
            *args,
            extra_rp_conf={
                # Disable leader balancer, as this test is doing its own
                # partition movement and the balancer would interfere
                'enable_leader_balancer': False,
                'enable_coproc': True,
                'developer_mode': True,
                'auto_create_topics_enabled': False
            },
            **kvargs)
        self._ctx = ctx
        self._rpk_tool = None
        self._build_tool = None
        self.result_data = []
        self.mconsumer = None

    def start_redpanda_nodes(self, nodes):
        self.start_redpanda(num_nodes=nodes)
        self._rpk_tool = RpkTool(self.redpanda)
        self._build_tool = WasmBuildTool(self._rpk_tool)

    def restart_random_redpanda_node(self):
        node = random.sample(self.redpanda.nodes, 1)[0]
        self.logger.info(f"Randomly killing redpanda node: {node.name}")
        self.redpanda.restart_nodes(node)

    def _deploy_identity_copro(self, inputs, outputs):
        script = WasmScript(inputs=inputs,
                            outputs=outputs,
                            script=WasmTemplateRepository.IDENTITY_TRANSFORM)

        self._build_tool.build_test_artifacts(script)

        self._rpk_tool.wasm_deploy(
            script.get_artifact(self._build_tool.work_dir), script.name,
            "ducktape")

    def _verify_materialized_assignments(self, topic, partition, assignments):
        admin = Admin(self.redpanda)
        massignments = self._get_assignments(admin, topic, partition)
        self.logger.info(
            f"materialized assignments for {topic}-{partition}: {massignments}"
        )

        self._wait_post_move(topic, partition, assignments)

    def _grab_input(self, topic):
        metadata = self.client().describe_topics()
        selected = [x for x in metadata if x['topic'] == topic]
        assert len(selected) == 1
        partition = random.choice(selected[0]["partitions"])
        return selected[0]["topic"], partition["partition"]

    def _on_data_consumed(self, record, node):
        self.result_data.append(record["value"])

    def _start_mconsumer(self, materialized_topic):
        self.mconsumer = VerifiableConsumer(
            self.test_context,
            num_nodes=1,
            redpanda=self.redpanda,
            topic=materialized_topic,
            group_id="test_group",
            on_record_consumed=self._on_data_consumed)
        self.mconsumer.start()

    def _await_consumer(self, limit, timeout):
        wait_until(
            lambda: self.mconsumer.total_consumed() >= limit,
            timeout_sec=timeout,
            err_msg=
            "Timeout of after %ds while awaiting delivery of %d materialized records, recieved: %d"
            % (timeout, limit, self.mconsumer.total_consumed()))
        self.mconsumer.stop()

    def _prime_env(self):
        output_topic = "identity_output2"
        self.start_redpanda_nodes(3)
        spec = TopicSpec(name="topic2",
                         partition_count=3,
                         replication_factor=3)
        self.client().create_topic(spec)
        self._deploy_identity_copro([spec.name], [output_topic])
        self.topic = spec.name
        self.start_producer(num_nodes=1, throughput=10000)
        self.start_consumer(1)
        self.await_startup(min_records=500)
        materialized_topic = construct_materialized_topic(
            spec.name, output_topic)

        def topic_created():
            metadata = self.client().describe_topics()
            self.logger.info(f"metadata: {metadata}")
            return any([x['topic'] == materialized_topic for x in metadata])

        wait_until(topic_created, timeout_sec=30, backoff_sec=2)

        self._start_mconsumer(materialized_topic)
        t, p = self._grab_input(spec.name)
        return {
            'topic': t,
            'partition': p,
            'materialized_topic': materialized_topic
        }

    @cluster(num_nodes=6)
    def test_dynamic(self):
        """
        Move partitions with active consumer / producer
        """
        s = self._prime_env()
        for _ in range(5):
            _, partition, assignments = self._do_move_and_verify(
                s['topic'], s['partition'])
            self._verify_materialized_assignments(s['materialized_topic'],
                                                  partition, assignments)

        # Vaidate input
        self.run_validation(min_records=500,
                            enable_idempotence=False,
                            consumer_timeout_sec=90)

        # Wait for all output
        self._await_consumer(self.consumer.total_consumed(), 90)

        # Validate number of records & their content
        self.logger.info(
            f'A: {self.mconsumer.total_consumed()} B: {self.consumer.total_consumed()}'
        )
        assert self.mconsumer.total_consumed() == self.consumer.total_consumed(
        )
        # Since the identity copro was deployed, contents of logs should be identical
        assert set(self.records_consumed) == set(self.result_data)

    @cluster(num_nodes=6)
    def test_dynamic_with_failure(self):
        s = self._prime_env()
        _, partition, assignments = self._do_move_and_verify(
            s['topic'], s['partition'])

        # Crash a node before verifying, it has a fixed amount of seconds to restart
        n = random.sample(self.redpanda.nodes, 1)[0]
        self.redpanda.restart_nodes(n)

        self._verify_materialized_assignments(s['materialized_topic'],
                                              partition, assignments)

        self.run_validation(min_records=500,
                            enable_idempotence=False,
                            consumer_timeout_sec=90)

        # Wait for all acked, output, significant because due to failure the number of
        # actual produced records may be less than expected
        num_producer_acked = len(self.producer.acked)
        self._await_consumer(num_producer_acked, 90)

        # GTE due to the fact that upon error, copro may re-processed already processed
        # data depending on when offsets were checkpointed
        assert self.mconsumer.total_consumed() >= num_producer_acked
Ejemplo n.º 6
0
class TestMirrorMakerService(EndToEndTest):
    kafka_source = "kafka"
    redpanda_source = "redpanda"

    def __init__(self, test_context):
        super(TestMirrorMakerService, self).__init__(test_context)

        self.topic = TopicSpec(replication_factor=3)
        # create single zookeeper node for Kafka
        self.zk = ZookeeperService(self.test_context,
                                   num_nodes=1,
                                   version=V_3_0_0)
        self.source_broker = None

    def setUp(self):
        self.zk.start()

    def tearDown(self):
        # ducktape handle service teardown automatically, but it is hard
        # to tell what went wrong if one of the services hangs.  Do it
        # explicitly here with some logging, to enable debugging issues
        # like https://github.com/redpanda-data/redpanda/issues/4270

        if self.source_broker is not None:
            self.logger.info(
                f"Stopping source broker ({self.source_broker.__class__.__name__})..."
            )
            self.source_broker.stop()
            self.logger.info(
                f"Awaiting source broker ({self.source_broker.__class__.__name__})..."
            )

        self.logger.info("Stopping zookeeper...")
        self.zk.stop()
        self.logger.info("Awaiting zookeeper...")

    def start_brokers(self, source_type=kafka_source):
        if source_type == TestMirrorMakerService.redpanda_source:
            self.source_broker = RedpandaService(self.test_context,
                                                 num_brokers=3)
        else:
            self.source_broker = KafkaServiceAdapter(
                self.test_context,
                KafkaService(self.test_context,
                             num_nodes=3,
                             zk=self.zk,
                             version=V_3_0_0))

        self.redpanda = RedpandaService(self.test_context, num_brokers=3)
        self.source_broker.start()
        self.redpanda.start()

        self.source_client = DefaultClient(self.source_broker)

        self.topic.partition_count = 1000 if self.redpanda.dedicated_nodes else 1
        self.source_client.create_topic(self.topic)

    def start_workload(self):

        self.consumer = VerifiableConsumer(
            self.test_context,
            num_nodes=1,
            redpanda=self.redpanda,
            topic=self.topic.name,
            group_id='consumer_test_group',
            on_record_consumed=self.on_record_consumed)
        self.consumer.start()

        self.producer = VerifiableProducer(
            self.test_context,
            num_nodes=1,
            redpanda=self.source_broker,
            topic=self.topic.name,
            throughput=1000,
            message_validator=is_int_with_prefix)
        self.producer.start()

    def wait_for_n_messages(self, n_messages=100):
        """Wait for a minimum number of messages to be successfully produced."""
        wait_until(
            lambda: self.producer.num_acked > n_messages,
            timeout_sec=10,
            err_msg=
            "Producer failed to produce %d messages in a reasonable amount of time."
            % n_messages)

    @cluster(num_nodes=10)
    @parametrize(source_type=kafka_source)
    @parametrize(source_type=redpanda_source)
    def test_simple_end_to_end(self, source_type):
        # start brokers
        self.start_brokers(source_type=source_type)
        # start mirror maker
        self.mirror_maker = MirrorMaker2(self.test_context,
                                         num_nodes=1,
                                         source_cluster=self.source_broker,
                                         target_cluster=self.redpanda)
        topics = []
        for i in range(0, 10):
            topics.append(
                TopicSpec(partition_count=random.randint(1, 10),
                          retention_bytes=random.randint(100000000, 300000000),
                          retention_ms=random.randint(1 * 3600000,
                                                      10 * 3600000)))
        self.source_client.create_topic(topics)
        self.mirror_maker.start()
        # start source producer & target consumer
        self.start_workload()

        self.run_validation(consumer_timeout_sec=120)
        self.mirror_maker.stop()
        target_client = DefaultClient(self.redpanda)
        for t in topics:
            desc = target_client.describe_topic(t.name)
            self.logger.debug(f'source topic: {t}, target topic: {desc}')
            assert len(desc.partitions) == t.partition_count

    @cluster(num_nodes=9)
    @parametrize(source_type=kafka_source)
    @parametrize(source_type=redpanda_source)
    def test_consumer_group_mirroring(self, source_type):
        # start redpanda
        self.start_brokers(source_type=source_type)
        consumer_group = "test-group-1"
        # start mirror maker
        self.mirror_maker = MirrorMaker2(self.test_context,
                                         num_nodes=1,
                                         source_cluster=self.source_broker,
                                         target_cluster=self.redpanda,
                                         consumer_group_pattern=consumer_group,
                                         log_level="TRACE")
        self.mirror_maker.start()

        msg_size = 512
        msg_cnt = 1000000 if self.redpanda.dedicated_nodes else 100

        # produce some messages to source redpanda
        producer = RpkProducer(self.test_context,
                               self.source_broker,
                               self.topic.name,
                               msg_size,
                               msg_cnt,
                               acks=-1)

        producer.start()
        producer.wait()
        producer.free()

        # consume some messages from source redpanda
        consumer = RpkConsumer(self.test_context,
                               self.source_broker,
                               self.topic.name,
                               ignore_errors=False,
                               retries=3,
                               group=consumer_group,
                               save_msgs=False,
                               num_msgs=int(msg_cnt / 5))

        consumer.start()
        consumer.wait()
        consumer.stop()
        source_messages = consumer.messages
        self.logger.info(f"source message count: {len(source_messages)}")
        consumer.free()

        src_rpk = RpkTool(self.source_broker)
        source_group = src_rpk.group_describe(consumer_group)
        target_rpk = RpkTool(self.redpanda)

        def target_group_equal():
            try:
                target_group = target_rpk.group_describe(consumer_group)
            except RpkException as e:
                # e.g. COORDINATOR_NOT_AVAILABLE
                self.logger.info(f"Error describing target cluster group: {e}")
                return False

            self.logger.info(
                f"source {source_group}, target_group: {target_group}")
            return target_group.partitions == source_group.partitions and target_group.name == source_group.name

        # wait for consumer group sync
        timeout = 600 if self.redpanda.dedicated_nodes else 60
        wait_until(target_group_equal, timeout_sec=timeout, backoff_sec=5)

        self.mirror_maker.stop()