def test_reads_writes(self):
        verifier_jar = "/opt/tx-verifier/tx-verifier.jar"

        self.redpanda.logger.info("creating topics")

        rpk = RpkTool(self.redpanda)
        rpk.create_topic("topic1", partitions=1, replicas=1)

        test = "concurrent-reads-writes"

        try:
            cmd = "{java} -jar {verifier_jar} {test} {brokers}".format(
                java="java",
                verifier_jar=verifier_jar,
                test=test,
                brokers=self.redpanda.brokers())
            subprocess.check_output(["/bin/sh", "-c", cmd],
                                    stderr=subprocess.STDOUT)
            self.redpanda.logger.info(
                "txn test \"{test}\" passed".format(test=test))
        except subprocess.CalledProcessError as e:
            self.redpanda.logger.info(
                "txn test \"{test}\" failed".format(test=test))
            errors = ""
            errors += test + "\n"
            errors += str(e.output) + "\n"
            errors += "---------------------------\n"
            raise DucktapeError(errors)
    def test_produce(self):
        verifier_bin = "/opt/redpanda-tests/go/sarama/produce_test/produce_test"

        self.redpanda.logger.info("creating topics")

        rpk = RpkTool(self.redpanda)
        rpk.create_topic("topic1")

        self.redpanda.logger.info("testing sarama produce")
        retries = 5
        for i in range(0, retries):
            try:
                cmd = "{verifier_bin} --brokers {brokers}".format(
                    verifier_bin=verifier_bin, brokers=self.redpanda.brokers())
                subprocess.check_output(["/bin/sh", "-c", cmd],
                                        stderr=subprocess.STDOUT)
                self.redpanda.logger.info("sarama produce test passed")
                break
            except subprocess.CalledProcessError as e:
                error = str(e.output)
                self.redpanda.logger.info("sarama produce failed with " +
                                          error)
                if i + 1 != retries and NOT_LEADER_FOR_PARTITION in error:
                    sleep(5)
                    continue
                raise DucktapeError("sarama produce failed with " + error)
Beispiel #3
0
    def verify(self, tests):
        verifier_jar = "/opt/tx-verifier/tx-verifier.jar"

        self.redpanda.logger.info("creating topics")

        rpk = RpkTool(self.redpanda)
        rpk.create_topic("topic1")
        rpk.create_topic("topic2")

        errors = ""

        for test in tests:
            self.redpanda.logger.info(
                "testing txn test \"{test}\"".format(test=test))
            try:
                cmd = "{java} -jar {verifier_jar} {test} {brokers}".format(
                    java="java",
                    verifier_jar=verifier_jar,
                    test=test,
                    brokers=self.redpanda.brokers())
                subprocess.check_output(["/bin/sh", "-c", cmd],
                                        stderr=subprocess.STDOUT)
                self.redpanda.logger.info(
                    "txn test \"{test}\" passed".format(test=test))
            except subprocess.CalledProcessError as e:
                self.redpanda.logger.info(
                    "txn test \"{test}\" failed".format(test=test))
                errors += test + "\n"
                errors += str(e.output) + "\n"
                errors += "---------------------------\n"

        if len(errors) > 0:
            raise DucktapeError(errors)
Beispiel #4
0
    def test_tx_init_passes(self):
        rpk = RpkTool(self.redpanda)
        rpk.create_topic("topic1")

        producer = Producer({
            "bootstrap.servers": self.redpanda.brokers(),
            "enable.idempotence": True,
            "transactional.id": "tx-id-1",
            "retries": 5
        })
        producer.init_transactions()
Beispiel #5
0
    def test_idempotent_write_passes(self):
        rpk = RpkTool(self.redpanda)
        rpk.create_topic("topic1")

        producer = Producer({
            "bootstrap.servers": self.redpanda.brokers(),
            "enable.idempotence": True,
            "retries": 5
        })
        producer.produce("topic1",
                         key="key1".encode('utf-8'),
                         value="value1".encode('utf-8'),
                         callback=on_delivery)
        producer.flush()
Beispiel #6
0
    def test_idempotency_compacted_topic(self):
        rpk = RpkTool(self.redpanda)
        rpk.create_topic("topic1", config={"cleanup.policy": "compact"})

        producer = Producer({
            "bootstrap.servers": self.redpanda.brokers(),
            "enable.idempotence": True,
            "retries": 5
        })
        producer.produce("topic1",
                         key="key1".encode('utf-8'),
                         value="value1".encode('utf-8'),
                         callback=on_delivery)
        producer.flush()
Beispiel #7
0
class TopicAutocreateTest(RedpandaTest):
    """
    Verify that autocreation works, and that the settings of an autocreated
    topic match those for a topic created by hand with rpk.
    """
    def __init__(self, test_context):
        super(TopicAutocreateTest, self).__init__(
            test_context=test_context,
            num_brokers=1,
            extra_rp_conf={'auto_create_topics_enabled': False})

        self.kafka_tools = KafkaCliTools(self.redpanda)
        self.rpk = RpkTool(self.redpanda)

    @cluster(num_nodes=1)
    def topic_autocreate_test(self):
        auto_topic = 'autocreated'
        manual_topic = "manuallycreated"

        # With autocreation disabled, producing to a nonexistent topic should not work.
        try:
            # Use rpk rather than kafka CLI because rpk errors out promptly
            self.rpk.produce(auto_topic, "foo", "bar")
        except Exception:
            # The write failed, and shouldn't have created a topic
            assert auto_topic not in self.kafka_tools.list_topics()
        else:
            assert False, "Producing to a nonexistent topic should fail"

        # Enable autocreation
        self.redpanda.restart_nodes(self.redpanda.nodes,
                                    {'auto_create_topics_enabled': True})

        # Auto create topic
        assert auto_topic not in self.kafka_tools.list_topics()
        self.kafka_tools.produce(auto_topic, 1, 4096)
        assert auto_topic in self.kafka_tools.list_topics()
        auto_topic_spec = self.kafka_tools.describe_topic(auto_topic)
        assert auto_topic_spec.retention_ms is None
        assert auto_topic_spec.retention_bytes is None

        # Create topic by hand, compare its properties to the autocreated one
        self.rpk.create_topic(manual_topic)
        manual_topic_spec = self.kafka_tools.describe_topic(auto_topic)
        assert manual_topic_spec.retention_ms == auto_topic_spec.retention_ms
        assert manual_topic_spec.retention_bytes == auto_topic_spec.retention_bytes

        # Clear name and compare the rest of the attributes
        manual_topic_spec.name = auto_topic_spec.name = None
        assert manual_topic_spec == auto_topic_spec
Beispiel #8
0
    def test_tx(self):
        verifier_jar = "/opt/tx-verifier/tx-verifier.jar"

        rpk = RpkTool(self.redpanda)
        rpk.create_topic("topic1")
        rpk.create_topic("topic2")

        self.redpanda.logger.error("starting tx verifier")
        try:
            cmd = ("{java} -jar {verifier_jar} {brokers}").format(
                java="java",
                verifier_jar=verifier_jar,
                brokers=self.redpanda.brokers())
            subprocess.check_output(["/bin/sh", "-c", cmd],
                                    stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as e:
            raise DucktapeError("tx test failed: " + str(e.output))
Beispiel #9
0
 def _restore_topic(self, topic_spec, overrides={}):
     """Restore individual topic"""
     self.logger.info(f"Restore topic called. Topic-manifest: {topic_spec}")
     conf = {
         'redpanda.remote.recovery': 'true',
         #'redpanda.remote.write': 'true',
     }
     conf.update(overrides)
     self.logger.info(f"Confg: {conf}")
     topic = topic_spec.name
     npart = topic_spec.partition_count
     nrepl = topic_spec.replication_factor
     rpk = RpkTool(self.redpanda)
     rpk.create_topic(topic, npart, nrepl, conf)
     time.sleep(10)
     rpk.describe_topic(topic)
     rpk.describe_topic_configs(topic)
    def test_memory_limited(self):
        """
        Check enforcement of the RAM-per-partition threshold
        """
        self.redpanda.set_resource_settings(
            ResourceSettings(memory_mb=1024, num_cpus=1))
        self.redpanda.set_extra_rp_conf({
            # Use a larger than default memory per partition, so that a 1GB system can be
            # tested without creating 1000 partitions (which overwhelms debug redpanda
            # builds because they're much slower than the real product)
            'topic_memory_per_partition':
            10 * 1024 * 1024,
        })

        self.redpanda.start()

        rpk = RpkTool(self.redpanda)

        # Three nodes, each with 1GB memory, replicas=3, should
        # result in an effective limit of 1024 with the default
        # threshold of 1MB per topic.
        try:
            rpk.create_topic("toobig", partitions=110, replicas=3)
        except RpkException as e:
            assert 'INVALID_PARTITIONS' in e.msg
        else:
            assert False

        # Should succeed
        rpk.create_topic("okay", partitions=55, replicas=3)

        # Trying to grow the partition count in violation of the limit should fail
        try:
            rpk.add_topic_partitions("okay", 55)
        except RpkException as e:
            assert 'INVALID_PARTITIONS' in e.msg
        else:
            assert False

        # Growing the partition count within the limit should succeed
        rpk.add_topic_partitions("okay", 10)
    def test_fd_limited(self):
        self.redpanda.set_resource_settings(ResourceSettings(nfiles=1000))
        self.redpanda.set_extra_rp_conf({
            # Disable memory limit: on a test node the physical memory can easily
            # be the limiting factor
            'topic_memory_per_partition': None,
        })
        self.redpanda.start()

        rpk = RpkTool(self.redpanda)

        # Default 10 fds per partition, we set ulimit down to 1000, so 100 should be the limit
        try:
            rpk.create_topic("toobig", partitions=110, replicas=3)
        except RpkException as e:
            assert 'INVALID_PARTITIONS' in e.msg
        else:
            assert False

        # Should succeed
        rpk.create_topic("okay", partitions=90, replicas=3)
    def test_cpu_limited(self):
        """
        Check enforcement of the partitions-per-core
        """
        self.redpanda.set_resource_settings(ResourceSettings(num_cpus=1))

        self.redpanda.set_extra_rp_conf({
            # Disable memory limit: on a test node the physical memory can easily
            # be the limiting factor
            'topic_memory_per_partition': None,
            # Disable FD enforcement: tests running on workstations may have low ulimits
            'topic_fds_per_partition': None
        })

        self.redpanda.start()

        rpk = RpkTool(self.redpanda)

        # Three nodes, each with 1 core, 7000 partition-replicas
        # per core, so with replicas=3, 7000 partitions should be the limit
        try:
            rpk.create_topic("toobig", partitions=8000, replicas=3)
        except RpkException as e:
            assert 'INVALID_PARTITIONS' in e.msg
        else:
            assert False

        try:
            rpk.create_topic("okay", partitions=6000, replicas=3)
        except RpkException as e:
            # Because this many partitions will overwhelm a debug build
            # of redpanda, we tolerate exceptions, as long as the exception
            # isn't about the partition count specifically.
            #
            # It would be better to execute this part of the test conditionally
            # on release builds only.
            assert 'INVALID_PARTITIONS' not in e.msg
Beispiel #13
0
class ManyPartitionsTest(PreallocNodesTest):
    """
    Validates basic functionality in the presence of larger numbers
    of partitions than most other tests.
    """
    topics = ()

    def __init__(self, test_ctx, *args, **kwargs):
        self._ctx = test_ctx
        super(ManyPartitionsTest, self).__init__(
            test_ctx,
            *args,
            num_brokers=6,
            node_prealloc_count=1,
            extra_rp_conf={
                # Disable leader balancer initially, to enable us to check for
                # stable leadership during initial elections and post-restart
                # elections.  We will switch it on later, to exercise it during
                # the traffic stress test.
                'enable_leader_balancer': False,
            },
            # Usually tests run with debug or trace logs, but when testing resource
            # limits we want to test in a more production-like configuration.
            log_level='info',
            **kwargs)
        self.rpk = RpkTool(self.redpanda)

    def _all_elections_done(self, topic_names: list[str], p_per_topic: int):
        any_incomplete = False
        for tn in topic_names:
            partitions = list(self.rpk.describe_topic(tn))
            if len(partitions) < p_per_topic:
                self.logger.info(f"describe omits partitions for topic {tn}")
                any_incomplete = True
                continue

            assert len(partitions) == p_per_topic
            for p in partitions:
                if p.leader == -1:
                    self.logger.info(
                        f"partition {tn}/{p.id} has no leader yet")
                    any_incomplete = True

        return not any_incomplete

    def _consume_all(self, topic_names: list[str], msg_count_per_topic: int,
                     timeout_per_topic: int):
        """
        Don't do anything with the messages, just consume them to demonstrate
        that doing so does not exhaust redpanda resources.
        """
        def consumer_saw_msgs(consumer):
            self.logger.info(
                f"Consumer message_count={consumer.message_count} / {msg_count_per_topic}"
            )
            # Tolerate greater-than, because if there were errors during production
            # there can have been retries.
            return consumer.message_count >= msg_count_per_topic

        for tn in topic_names:
            consumer = RpkConsumer(self._ctx,
                                   self.redpanda,
                                   tn,
                                   save_msgs=False,
                                   fetch_max_bytes=BIG_FETCH,
                                   num_msgs=msg_count_per_topic)
            consumer.start()
            wait_until(lambda: consumer_saw_msgs(consumer),
                       timeout_sec=timeout_per_topic,
                       backoff_sec=5)
            consumer.stop()
            consumer.free()

    def setUp(self):
        # defer redpanda startup to the test, it might want to tweak
        # ResourceSettings based on its parameters.
        pass

    @cluster(num_nodes=7, log_allow_list=RESTART_LOG_ALLOW_LIST)
    def test_many_partitions(self):
        """
        Validate that redpanda works with partition counts close to its resource
        limits.

        This test should evolve over time as we improve efficiency and can reliably
        run with higher partition counts.  It should roughly track the values we
        use for topic_memory_per_partition and topic_fds_per_partition.

        * Check topic can be created.
        * Check leadership election succeeds for all partitions.
        * Write in enough data such that an unlimited size fetch
          would exhaust ram (check enforcement of kafka_max_bytes_per_fetch).
        * Consume all the data from the topic

        * Restart nodes several times (check that recovery works, and that the additional
          log segments created by rolling segments on restart do not cause us
          to exhaust resources.

        * Run a general produce+consume workload to check that the system remains in
          a functional state.
        """

        # This test requires dedicated system resources to run reliably.
        #assert self.redpanda.dedicated_nodes

        # Scale tests are not run on debug builds
        assert not self.debug_mode

        replication_factor = 3
        node_count = len(self.redpanda.nodes)

        # If we run on nodes with more memory than our HARD_PARTITION_LIMIT, then
        # artificially throttle the nodes' memory to avoid the test being too easy.
        # We are validating that the system works up to the limit, and that it works
        # up to the limit within the default per-partition memory footprint.
        node_memory = self.redpanda.get_node_memory_mb()

        # HARD_PARTITION_LIMIT is for a 3 node cluster, adjust according to
        # the number of nodes in this cluster.
        partition_limit = HARD_PARTITION_LIMIT * (node_count / 3)

        mb_per_partition = 1

        # How much memory to reserve for internal partitions, such as
        # id_allocator.  This is intentionally higher than needed, to
        # avoid having to update this test each time a new internal topic
        # is added.
        internal_partition_slack = 10

        # Emulate seastar's policy for default reserved memory
        reserved_memory = max(1536, int(0.07 * node_memory) + 1)
        effective_node_memory = node_memory - reserved_memory

        # TODO: calculate an appropriate segment size for the disk space divided
        # by the partition count, then set an appropriate retention.bytes and
        # enable compaction, so that during the final stress period of the test,
        # we are exercising compaction.

        # Clamp memory if nodes have more memory than should be required
        # to exercise the partition limit.
        if effective_node_memory > HARD_PARTITION_LIMIT / mb_per_partition:
            clamp_memory = mb_per_partition * (
                (HARD_PARTITION_LIMIT + internal_partition_slack) +
                reserved_memory)

            # Handy if hacking HARD_PARTITION_LIMIT to something low to run on a workstation
            clamp_memory = max(clamp_memory, 500)

            resource_settings = ResourceSettings(memory_mb=clamp_memory)
            self.redpanda.set_resource_settings(resource_settings)
        elif effective_node_memory < HARD_PARTITION_LIMIT / mb_per_partition:
            raise RuntimeError(
                f"Node memory is too small ({node_memory}MB - {reserved_memory}MB)"
            )

        # Run with one huge topic: this is the more stressful case for Redpanda, compared
        # with multiple modestly-sized topics, so it's what we test to find the system's limits.
        n_topics = 1

        # Partitions per topic
        n_partitions = int(partition_limit / n_topics)

        self.logger.info(
            f"Running partition scale test with {n_partitions} partitions on {n_topics} topics"
        )

        self.redpanda.start()

        self.logger.info("Entering topic creation")
        topic_names = [f"scale_{i:06d}" for i in range(0, n_topics)]
        for tn in topic_names:
            self.logger.info(
                f"Creating topic {tn} with {n_partitions} partitions")
            self.rpk.create_topic(tn,
                                  partitions=n_partitions,
                                  replicas=replication_factor)

        self.logger.info(f"Awaiting elections...")
        wait_until(lambda: self._all_elections_done(topic_names, n_partitions),
                   timeout_sec=60,
                   backoff_sec=5)
        self.logger.info(f"Initial elections done.")

        for node in self.redpanda.nodes:
            files = self.redpanda.lsof_node(node)
            file_count = sum(1 for _ in files)
            self.logger.info(
                f"Open files after initial selection on {node.name}: {file_count}"
            )

        # Assume fetches will be 10MB, the franz-go default
        fetch_mb_per_partition = 10 * 1024 * 1024

        # * Need enough data that if a consumer tried to fetch it all at once
        # in a single request, it would run out of memory.  OR the amount of
        # data that would fill a 10MB max_bytes per partition in a fetch, whichever
        # is lower (avoid writing excessive data for tests with fewer partitions).
        # * Then apply a factor of two to make sure we have enough data to drive writes
        # to disk during consumption, not just enough data to hold it all in the batch
        # cache.
        write_bytes_per_topic = min(
            int((self.redpanda.get_node_memory_mb() * 1024 * 1024) / n_topics),
            fetch_mb_per_partition * n_partitions) * 2

        if self.scale.release:
            # Release tests can be much longer running: 10x the amount of
            # data we fire through the system
            write_bytes_per_topic *= 10

        msg_size = 128 * 1024
        msg_count_per_topic = int((write_bytes_per_topic / msg_size))

        # Approx time to write or read all messages, for timeouts
        # Pessimistic bandwidth guess, accounting for the sub-disk bandwidth
        # that a single-threaded consumer may see
        expect_bandwidth = 50 * 1024 * 1024

        expect_transmit_time = int(write_bytes_per_topic / expect_bandwidth)
        expect_transmit_time = max(expect_transmit_time, 30)

        self.logger.info("Entering initial produce")
        for tn in topic_names:
            t1 = time.time()
            producer = FranzGoVerifiableProducer(
                self.test_context,
                self.redpanda,
                tn,
                msg_size,
                msg_count_per_topic,
                custom_node=self.preallocated_nodes)
            producer.start()
            producer.wait(timeout_sec=expect_transmit_time)
            self.free_preallocated_nodes()
            duration = time.time() - t1
            self.logger.info(
                f"Wrote {write_bytes_per_topic} bytes to {tn} in {duration}s, bandwidth {(write_bytes_per_topic / duration)/(1024 * 1024)}MB/s"
            )

        def get_fd_counts():
            counts = {}
            with concurrent.futures.ThreadPoolExecutor(
                    max_workers=node_count) as executor:
                futs = {}
                for node in self.redpanda.nodes:
                    futs[node.name] = executor.submit(
                        lambda: sum(1 for _ in self.redpanda.lsof_node(node)))

                for node_name, fut in futs.items():
                    file_count = fut.result()
                    counts[node_name] = file_count

            return counts

        for node_name, file_count in get_fd_counts().items():
            self.logger.info(
                f"Open files before restarts on {node_name}: {file_count}")

        # Over large partition counts, the startup time is linear with the
        # amount of data we played in, because no one partition gets far
        # enough to snapshot.
        expect_start_time = expect_transmit_time

        # Measure the impact of restarts on resource utilization on an idle system:
        # at time of writing we know that the used FD count will go up substantially
        # on each restart (https://github.com/redpanda-data/redpanda/issues/4057)
        restart_count = 2

        self.logger.info("Entering restart stress test")
        for i in range(1, restart_count + 1):
            self.logger.info(f"Cluster restart {i}/{restart_count}...")

            # Normal restarts are rolling restarts, but because replay takes substantial time,
            # on an idle system it is helpful to do a concurrent global restart rather than
            # waiting for each node one by one.
            with concurrent.futures.ThreadPoolExecutor(
                    max_workers=node_count) as executor:
                futs = []
                for node in self.redpanda.nodes:
                    futs.append(
                        executor.submit(self.redpanda.restart_nodes,
                                        nodes=[node],
                                        start_timeout=expect_start_time))

                for f in futs:
                    # Raise on error
                    f.result()
            self.logger.info(
                f"Restart {i}/{restart_count} complete.  Waiting for elections..."
            )

            wait_until(
                lambda: self._all_elections_done(topic_names, n_partitions),
                timeout_sec=60,
                backoff_sec=5)
            self.logger.info(f"Post-restart elections done.")

            for node_name, file_count in get_fd_counts().items():
                self.logger.info(
                    f"Open files after {i} restarts on {node_name}: {file_count}"
                )

        # With increased overhead from all those segment rolls during restart,
        # check that consume still works.
        self._consume_all(topic_names, msg_count_per_topic,
                          expect_transmit_time)

        # Now that we've tested basic ability to form consensus and survive some
        # restarts, move on to a more general stress test.

        self.logger.info("Entering traffic stress test")
        target_topic = topic_names[0]

        stress_msg_size = 32768
        stress_data_size = 1024 * 1024 * 1024 * 100
        stress_msg_count = int(stress_data_size / stress_msg_size)
        fast_producer = FranzGoVerifiableProducer(
            self.test_context,
            self.redpanda,
            target_topic,
            stress_msg_size,
            stress_msg_count,
            custom_node=self.preallocated_nodes)
        fast_producer.start()

        # Don't start consumers until the producer has written out its first
        # checkpoint with valid ranges.
        wait_until(lambda: fast_producer.produce_status.acked > 0,
                   timeout_sec=30,
                   backoff_sec=1.0)

        rand_consumer = FranzGoVerifiableRandomConsumer(
            self.test_context,
            self.redpanda,
            target_topic,
            0,
            100,
            10,
            nodes=self.preallocated_nodes)
        rand_consumer.start(clean=False)
        rand_consumer.shutdown()
        rand_consumer.wait()

        fast_producer.wait()

        seq_consumer = FranzGoVerifiableSeqConsumer(self.test_context,
                                                    self.redpanda,
                                                    target_topic, 0,
                                                    self.preallocated_nodes)
        seq_consumer.start(clean=False)
        seq_consumer.shutdown()
        seq_consumer.wait()
        assert seq_consumer.consumer_status.invalid_reads == 0
        assert seq_consumer.consumer_status.valid_reads == stress_msg_count + msg_count_per_topic

        self.logger.info("Entering leader balancer stress test")

        # Enable the leader balancer and check that the system remains stable
        # under load.  We do not leave the leader balancer on for most of the test, because
        # it makes reads _much_ slower, because the consumer keeps stalling and waiting for
        # elections: at any moment in a 10k partition topic, it's highly likely at least
        # one partition is offline for a leadership migration.
        self.redpanda.set_cluster_config({'enable_leader_balancer': True},
                                         expect_restart=False)
        lb_stress_period = 120
        lb_stress_produce_bytes = expect_bandwidth * lb_stress_period
        lb_stress_message_count = int(lb_stress_produce_bytes /
                                      stress_msg_size)
        fast_producer = FranzGoVerifiableProducer(
            self.test_context,
            self.redpanda,
            target_topic,
            stress_msg_size,
            lb_stress_message_count,
            custom_node=self.preallocated_nodes)
        fast_producer.start()
        rand_consumer.start()
        time.sleep(lb_stress_period
                   )  # Let the system receive traffic for a set time period
        rand_consumer.shutdown()
        rand_consumer.wait()
        fast_producer.wait()
Beispiel #14
0
class RpkToolTest(RedpandaTest):
    def __init__(self, ctx):
        super(RpkToolTest, self).__init__(test_context=ctx)
        self._ctx = ctx
        self._rpk = RpkTool(self.redpanda)

    def test_create_topic(self):
        self._rpk.create_topic("topic")

        wait_until(lambda: "topic" in self._rpk.list_topics(),
                   timeout_sec=10,
                   backoff_sec=1,
                   err_msg="Topic never appeared.")

    def test_produce(self):
        topic = 'topic'
        message = 'message'
        key = 'key'
        h_key = 'h_key'
        h_value = 'h_value'
        headers = [h_key + ':' + h_value]

        self._rpk.create_topic(topic)
        self._rpk.produce(topic, key, message, headers)

        c = RpkConsumer(self._ctx, self.redpanda, topic)
        c.start()

        def cond():
            return len(c.messages) == 1 \
                and c.messages[0]['message'] == message \
                and c.messages[0]['key'] == key \
                and c.messages[0]['headers'] == [
                    {'key': h_key, 'value': h_value},
                ]

        wait_until(cond,
                   timeout_sec=30,
                   backoff_sec=2,
                   err_msg="Message didn't appear.")

    def test_consume_as_group(self):
        topic = 'topic_group'
        message = 'message'
        key = 'key'
        h_key = 'h_key'
        h_value = 'h_value'
        headers = [h_key + ':' + h_value]

        self._rpk.create_topic(topic)

        c = RpkConsumer(self._ctx, self.redpanda, topic, group='group')
        c.start()

        def cond():
            if c.error:
                raise c.error
            self._rpk.produce(topic, key, message, headers)
            return c.messages \
                and c.messages[0]['message'] == message \
                and c.messages[0]['key'] == key \
                and c.messages[0]['headers'] == [
                    {'key': h_key, 'value': h_value},
                ]

        wait_until(cond,
                   timeout_sec=30,
                   backoff_sec=8,
                   err_msg="Message didn't appear.")

    def test_consume_newest(self):
        topic = 'topic_newest'
        message = 'message'
        key = 'key'
        h_key = 'h_key'
        h_value = 'h_value'
        headers = [h_key + ':' + h_value]

        self._rpk.create_topic(topic)
        # Gotta sleep to make sure the topic is replicated and the
        # consumer doesn't fail.
        time.sleep(5)

        c = RpkConsumer(self._ctx, self.redpanda, topic, offset='newest')
        c.start()

        def cond():
            if c.error:
                raise c.error
            self._rpk.produce(topic, key, message, headers)
            return c.messages \
                and c.messages[0]['message'] == message \
                and c.messages[0]['key'] == key \
                and c.messages[0]['headers'] == [
                    {'key': h_key, 'value': h_value},
                ]

        wait_until(cond,
                   timeout_sec=30,
                   backoff_sec=8,
                   err_msg="Message didn't appear.")

    def test_consume_oldest(self):
        topic = 'topic'

        n = random.randint(10, 100)
        msgs = {}
        for i in range(n):
            msgs['key-' + str(i)] = 'message-' + str(i)

        # Produce messages
        for k in msgs:
            self._rpk.produce(topic, k, msgs[k])

        c = RpkConsumer(self._ctx, self.redpanda, topic)
        c.start()

        def cond():
            # Consume from the beginning
            if len(c.messages) != len(msgs):
                return False

            for m in c.messages:
                key = m['key']
                if key is None:
                    return False

                if m['message'] != msgs[key]:
                    return False

            return True

        wait_until(cond,
                   timeout_sec=30,
                   backoff_sec=8,
                   err_msg="Message didn't appear.")

    def test_consume_from_partition(self):
        topic = 'topic_partition'

        n_parts = random.randint(3, 100)
        self._rpk.create_topic(topic, partitions=n_parts)

        n = random.randint(10, 30)
        msgs = {}
        for i in range(n):
            msgs['key-' + str(i)] = 'message-' + str(i)

        part = random.randint(0, n_parts)
        # Produce messages to a random partition
        for k in msgs:
            self._rpk.produce(topic, k, msgs[k], partition=part)

        # Consume from the beginning
        c = RpkConsumer(self._ctx,
                        self.redpanda,
                        topic,
                        offset='oldest',
                        partitions=[part])
        c.start()

        def cond():
            if len(c.messages) != len(msgs):
                return False

            for m in c.messages:
                key = m['key']
                if key is None:
                    return False

                if m['message'] != msgs[key]:
                    return False

            return True

        wait_until(cond,
                   timeout_sec=10,
                   backoff_sec=1,
                   err_msg="Message didn't appear.")
    def test_recreated_topic_metadata_are_valid(self, replication_factor):
        """
        Test recreated topic metadata are valid across all the nodes
        """

        topic = 'tp-test'
        partition_count = 5
        rpk = RpkTool(self.redpanda)
        kcat = KafkaCat(self.redpanda)
        admin = Admin(self.redpanda)
        # create topic with replication factor of 3
        rpk.create_topic(topic='tp-test',
                         partitions=partition_count,
                         replicas=replication_factor)

        # produce some data to the topic

        def wait_for_leader(partition, expected_leader):
            leader, _ = kcat.get_partition_leader(topic, partition)
            return leader == expected_leader

        def transfer_all_leaders():
            partitions = rpk.describe_topic(topic)
            for p in partitions:
                replicas = set(p.replicas)
                replicas.remove(p.leader)
                target = random.choice(list(replicas))
                admin.partition_transfer_leadership("kafka", topic, p.id,
                                                    target)
                wait_until(lambda: wait_for_leader(p.id, target),
                           timeout_sec=30,
                           backoff_sec=1)
            msg_cnt = 100
            producer = RpkProducer(self.test_context,
                                   self.redpanda,
                                   topic,
                                   16384,
                                   msg_cnt,
                                   acks=-1)

            producer.start()
            producer.wait()
            producer.free()

        # transfer leadership to grow the term
        for i in range(0, 10):
            transfer_all_leaders()

        # recreate the topic
        rpk.delete_topic(topic)
        rpk.create_topic(topic='tp-test',
                         partitions=partition_count,
                         replicas=3)

        def metadata_consistent():
            # validate leadership information on each node
            for p in range(0, partition_count):
                leaders = set()
                for n in self.redpanda.nodes:
                    admin_partition = admin.get_partitions(topic=topic,
                                                           partition=p,
                                                           namespace="kafka",
                                                           node=n)
                    self.logger.info(
                        f"node: {n.account.hostname} partition: {admin_partition}"
                    )
                    leaders.add(admin_partition['leader_id'])

                self.logger.info(f"{topic}/{p} leaders: {leaders}")
                if len(leaders) != 1:
                    return False
            return True

        wait_until(metadata_consistent, 45, backoff_sec=2)
Beispiel #16
0
class RpkToolTest(RedpandaTest):
    def __init__(self, ctx):
        super(RpkToolTest, self).__init__(test_context=ctx)
        self._ctx = ctx
        self._rpk = RpkTool(self.redpanda)

    @cluster(num_nodes=3)
    def test_create_topic(self):
        self._rpk.create_topic("topic")

        wait_until(lambda: "topic" in self._rpk.list_topics(),
                   timeout_sec=10,
                   backoff_sec=1,
                   err_msg="Topic never appeared.")

    @cluster(num_nodes=4)
    def test_produce(self):
        topic = 'topic'
        message = 'message'
        key = 'key'
        h_key = 'h_key'
        h_value = 'h_value'
        headers = [h_key + ':' + h_value]

        self._rpk.create_topic(topic)
        self._rpk.produce(topic, key, message, headers)

        c = RpkConsumer(self._ctx, self.redpanda, topic)
        c.start()

        def cond():
            return c.messages is not None \
                and len(c.messages) == 1 \
                and c.messages[0]['value'] == message \
                and c.messages[0]['key'] == key \
                and c.messages[0]['headers'] == [
                    {'key': h_key, 'value': h_value},
                ]

        wait_until(cond,
                   timeout_sec=120,
                   backoff_sec=30,
                   err_msg="Message didn't appear.")

    @cluster(num_nodes=4)
    def test_consume_as_group(self):
        topic = 'topic_group'
        message = 'message'
        key = 'key'
        h_key = 'h_key'
        h_value = 'h_value'
        headers = [h_key + ':' + h_value]

        self._rpk.create_topic(topic)

        c = RpkConsumer(self._ctx, self.redpanda, topic, group='group')
        c.start()

        def cond():
            if c.error:
                raise c.error
            self._rpk.produce(topic, key, message, headers)
            return c.messages \
                and c.messages[0]['value'] == message \
                and c.messages[0]['key'] == key \
                and c.messages[0]['headers'] == [
                    {'key': h_key, 'value': h_value},
                ]

        wait_until(cond,
                   timeout_sec=120,
                   backoff_sec=15,
                   err_msg="Message didn't appear.")

    @cluster(num_nodes=4)
    def test_consume_newest(self):
        topic = 'topic_newest'
        message = 'newest message'
        key = 'key'
        h_key = 'h_key'
        h_value = 'h_value'
        headers = [h_key + ':' + h_value]

        self._rpk.create_topic(topic)

        c = RpkConsumer(self._ctx, self.redpanda, topic, offset='newest')
        c.start()

        def cond():
            if c.error:
                raise c.error
            self._rpk.produce(topic, key, message, headers)
            return c.messages \
                and c.messages[0]['value'] == message \
                and c.messages[0]['key'] == key \
                and c.messages[0]['headers'] == [
                    {'key': h_key, 'value': h_value},
                ]

        wait_until(cond,
                   timeout_sec=150,
                   backoff_sec=30,
                   err_msg="Message didn't appear.")

    @cluster(num_nodes=4)
    def test_consume_oldest(self):
        topic = 'topic'

        n = random.randint(10, 100)
        msgs = {}
        for i in range(n):
            msgs['key-' + str(i)] = 'message-' + str(i)

        self._rpk.create_topic(topic)

        # Produce messages
        for k in msgs:
            self._rpk.produce(topic, k, msgs[k])

        c = RpkConsumer(self._ctx, self.redpanda, topic)
        c.start()

        def cond():
            # Consume from the beginning
            if len(c.messages) != len(msgs):
                return False

            for m in c.messages:
                key = m['key']
                if key is None:
                    return False

                if m['value'] != msgs[key]:
                    return False

            return True

        wait_until(cond,
                   timeout_sec=60,
                   backoff_sec=20,
                   err_msg="Message didn't appear.")

    @cluster(num_nodes=4)
    def test_consume_from_partition(self):
        topic = 'topic_partition'

        n_parts = random.randint(3, 100)
        self._rpk.create_topic(topic, partitions=n_parts)

        n = random.randint(10, 30)
        msgs = {}
        for i in range(n):
            msgs['key-' + str(i)] = 'message-' + str(i)

        part = random.randint(0, n_parts - 1)
        # Produce messages to a random partition
        for k in msgs:
            self._rpk.produce(topic, k, msgs[k], partition=part)

        # Consume from the beginning
        c = RpkConsumer(self._ctx,
                        self.redpanda,
                        topic,
                        offset='oldest',
                        partitions=[part])
        c.start()

        def cond():
            if len(c.messages) != len(msgs):
                return False

            for m in c.messages:
                key = m['key']
                if key is None:
                    return False

                if m['value'] != msgs[key]:
                    return False

            return True

        # timeout loop, but reset the timeout if we appear to be making progress
        retries = 10
        prev_msg_count = len(c.messages)
        while retries > 0:
            self.redpanda.logger.debug(
                f"Message count {len(c.messages)} retries {retries}")
            if cond():
                return
            if len(c.messages) > prev_msg_count:
                prev_msg_count = len(c.messages)
                retries = 10
            time.sleep(1)
            retries -= 1

        raise ducktape.errors.TimeoutError("Message didn't appear")
Beispiel #17
0
    def test_deletion_stops_move(self):
        """
        Delete topic which partitions are being moved and check status after 
        topic is created again, old move 
        opeartions should not influcence newly created topic
        """
        self.start_redpanda(num_nodes=3)

        # create a single topic with replication factor of 1
        topic = 'test-topic'
        rpk = RpkTool(self.redpanda)
        rpk.create_topic(topic, 1, 1)
        partition = 0
        num_records = 1000

        self.logger.info(f"Producing to {topic}")
        producer = KafProducer(self.test_context, self.redpanda, topic,
                               num_records)
        producer.start()
        self.logger.info(
            f"Finished producing to {topic}, waiting for producer...")
        producer.wait()
        producer.free()
        self.logger.info(f"Producer stop complete.")

        admin = Admin(self.redpanda)
        # get current assignments
        assignments = self._get_assignments(admin, topic, partition)
        assert len(assignments) == 1
        self.logger.info(f"assignments for {topic}-{partition}: {assignments}")
        brokers = admin.get_brokers()
        self.logger.info(f"available brokers: {brokers}")
        candidates = list(
            filter(lambda b: b['node_id'] != assignments[0]['node_id'],
                   brokers))
        replacement = random.choice(candidates)
        target_assignment = [{'node_id': replacement['node_id'], 'core': 0}]
        self.logger.info(
            f"target assignments for {topic}-{partition}: {target_assignment}")
        # shutdown target node to make sure that move will never complete
        node = self.redpanda.get_node(replacement['node_id'])
        self.redpanda.stop_node(node)
        admin.set_partition_replicas(topic, partition, target_assignment)

        # check that the status is in progress

        def get_status():
            partition_info = admin.get_partitions(topic, partition)
            self.logger.info(
                f"current assignments for {topic}-{partition}: {partition_info}"
            )
            return partition_info["status"]

        wait_until(lambda: get_status() == 'in_progress', 10, 1)
        # delete the topic
        rpk.delete_topic(topic)
        # start the node back up
        self.redpanda.start_node(node)
        # create topic again
        rpk.create_topic(topic, 1, 1)
        wait_until(lambda: get_status() == 'done', 10, 1)