def test_produce(self): topic = 'topic' message = 'message' key = 'key' h_key = 'h_key' h_value = 'h_value' headers = [h_key + ':' + h_value] self._rpk.create_topic(topic) self._rpk.produce(topic, key, message, headers) c = RpkConsumer(self._ctx, self.redpanda, topic) c.start() def cond(): return c.messages is not None \ and len(c.messages) == 1 \ and c.messages[0]['value'] == message \ and c.messages[0]['key'] == key \ and c.messages[0]['headers'] == [ {'key': h_key, 'value': h_value}, ] wait_until(cond, timeout_sec=120, backoff_sec=30, err_msg="Message didn't appear.")
def test_consume_newest(self): topic = 'topic_newest' message = 'newest message' key = 'key' h_key = 'h_key' h_value = 'h_value' headers = [h_key + ':' + h_value] self._rpk.create_topic(topic) c = RpkConsumer(self._ctx, self.redpanda, topic, offset='newest') c.start() def cond(): if c.error: raise c.error self._rpk.produce(topic, key, message, headers) return c.messages \ and c.messages[0]['value'] == message \ and c.messages[0]['key'] == key \ and c.messages[0]['headers'] == [ {'key': h_key, 'value': h_value}, ] wait_until(cond, timeout_sec=150, backoff_sec=30, err_msg="Message didn't appear.")
def _consume_and_count_bytes(self): consumer = RpkConsumer(self._ctx, self.redpanda, self.topic) consumer.start() self._bytes_received = 0 def count_bytes(): for msg in consumer.messages: value = msg["value"] # Ignore None values if value is None: return False self._bytes_received += len(value) # Returns true the first time any bytes # are fetched. return self._bytes_received > 0 wait_until(count_bytes, timeout_sec=30, backoff_sec=5, err_msg="count_bytes() failed") consumer.stop()
def _consume_all(self, topic_names: list[str], msg_count_per_topic: int, timeout_per_topic: int): """ Don't do anything with the messages, just consume them to demonstrate that doing so does not exhaust redpanda resources. """ def consumer_saw_msgs(consumer): self.logger.info( f"Consumer message_count={consumer.message_count} / {msg_count_per_topic}" ) # Tolerate greater-than, because if there were errors during production # there can have been retries. return consumer.message_count >= msg_count_per_topic for tn in topic_names: consumer = RpkConsumer(self._ctx, self.redpanda, tn, save_msgs=False, fetch_max_bytes=BIG_FETCH, num_msgs=msg_count_per_topic) consumer.start() wait_until(lambda: consumer_saw_msgs(consumer), timeout_sec=timeout_per_topic, backoff_sec=5) consumer.stop() consumer.free()
def test_consume_oldest(self): topic = 'topic' n = random.randint(10, 100) msgs = {} for i in range(n): msgs['key-' + str(i)] = 'message-' + str(i) # Produce messages for k in msgs: self._rpk.produce(topic, k, msgs[k]) c = RpkConsumer(self._ctx, self.redpanda, topic) c.start() def cond(): # Consume from the beginning if len(c.messages) != len(msgs): return False for m in c.messages: key = m['key'] if key is None: return False if m['message'] != msgs[key]: return False return True wait_until(cond, timeout_sec=30, backoff_sec=8, err_msg="Message didn't appear.")
def test_kafka_streams(self): example = self.create_example() # This will raise TypeError if PRODUCER is undefined producer = self.PRODUCER(self._ctx, self.redpanda, self.topics[0].name) consumer = RpkConsumer(self._ctx, self.redpanda, self.topics[1].name) # Start the example example.start() # Produce some data producer.start() producer.wait() # Consume the data consumer.start() def try_cons(): i = 0 msgs = consumer.messages while i < len(msgs) and not self.is_valid_msg(msgs[i]): i += 1 return i < len(msgs) wait_until( try_cons, timeout_sec=self._timeout, backoff_sec=5, err_msg=f"kafka-streams {self._ctx.cls_name} consumer failed") consumer.stop() producer.stop() example.stop()
def test_many_clients(self): """ Check that redpanda remains stable under higher numbers of clients than usual. """ # This test requires dedicated system resources to run reliably. assert self.redpanda.dedicated_nodes # Scale tests are not run on debug builds assert not self.debug_mode PARTITION_COUNT = 100 PRODUCER_COUNT = 4000 TOPIC_NAME = "manyclients" RECORDS_PER_PRODUCER = 1000 self.client().create_topic( TopicSpec( name=TOPIC_NAME, partition_count=PARTITION_COUNT, retention_bytes=10 * 1024 * 1024, segment_bytes=1024 * 1024 * 5, )) # Two consumers, just so that we are at least touching consumer # group functionality, if not stressing the overall number of consumers. consumer_a = RpkConsumer(self.test_context, self.redpanda, TOPIC_NAME, group="testgroup", save_msgs=False) consumer_b = RpkConsumer(self.test_context, self.redpanda, TOPIC_NAME, group="testgroup", save_msgs=False) producer = ProducerSwarm(self.test_context, self.redpanda, TOPIC_NAME, PRODUCER_COUNT, RECORDS_PER_PRODUCER) producer.start() consumer_a.start() consumer_b.start() producer.wait() def complete(): expect = PRODUCER_COUNT * RECORDS_PER_PRODUCER self.logger.info( f"Message counts: {consumer_a.message_count} {consumer_b.message_count} (vs {expect})" ) return consumer_a.message_count + consumer_b.message_count >= expect wait_until(complete, timeout_sec=30, backoff_sec=1, err_msg="Consumers didn't see all messages")
def test_consume_from_partition(self): topic = 'topic_partition' n_parts = random.randint(3, 100) self._rpk.create_topic(topic, partitions=n_parts) n = random.randint(10, 30) msgs = {} for i in range(n): msgs['key-' + str(i)] = 'message-' + str(i) part = random.randint(0, n_parts - 1) # Produce messages to a random partition for k in msgs: self._rpk.produce(topic, k, msgs[k], partition=part) # Consume from the beginning c = RpkConsumer(self._ctx, self.redpanda, topic, offset='oldest', partitions=[part]) c.start() def cond(): if len(c.messages) != len(msgs): return False for m in c.messages: key = m['key'] if key is None: return False if m['value'] != msgs[key]: return False return True # timeout loop, but reset the timeout if we appear to be making progress retries = 10 prev_msg_count = len(c.messages) while retries > 0: self.redpanda.logger.debug( f"Message count {len(c.messages)} retries {retries}") if cond(): return if len(c.messages) > prev_msg_count: prev_msg_count = len(c.messages) retries = 10 time.sleep(1) retries -= 1 raise ducktape.errors.TimeoutError("Message didn't appear")
def test_consume_from_partition(self): topic = 'topic_partition' n_parts = random.randint(3, 100) self._rpk.create_topic(topic, partitions=n_parts) n = random.randint(10, 30) msgs = {} for i in range(n): msgs['key-' + str(i)] = 'message-' + str(i) part = random.randint(0, n_parts) # Produce messages to a random partition for k in msgs: self._rpk.produce(topic, k, msgs[k], partition=part) # Consume from the beginning c = RpkConsumer(self._ctx, self.redpanda, topic, offset='oldest', partitions=[part]) c.start() def cond(): if len(c.messages) != len(msgs): return False for m in c.messages: key = m['key'] if key is None: return False if m['message'] != msgs[key]: return False return True wait_until(cond, timeout_sec=10, backoff_sec=1, err_msg="Message didn't appear.")
def test_leadership_transfer(self): rpk = RpkTool(self.redpanda) topics = list(filter(lambda x: x.partition_count > 1, self.topics)) group = "g0" producers = [] for topic in topics: producer = RpkProducer(self._ctx, self.redpanda, topic.name, msg_size=5, msg_count=1000) producer.start() producers.append(producer) consumers = [] for topic in topics: consumer = RpkConsumer(self._ctx, self.redpanda, topic.name, group=group) consumer.start() consumers.append(consumer) # Wait until cluster starts producing metrics wait_until( lambda: self.redpanda.metrics_sample("kafka_group_offset") != None, timeout_sec=30, backoff_sec=5) admin = Admin(redpanda=self.redpanda) def get_offset_with_node_from_metric(group): metric = self.redpanda.metrics_sample("kafka_group_offset") if metric is None: return None metric = metric.label_filter(dict(group=group)) return metric.samples def get_group_leader(): return admin.get_partitions(namespace="kafka_internal", topic="group", partition=0)['leader_id'] def check_metric_from_node(node): metrics_offsets = get_offset_with_node_from_metric(group) if metrics_offsets == None: return False return all([ metric.node.account.hostname == node.account.hostname for metric in metrics_offsets ]) def transfer_completed(new_leader_node): return self.redpanda.nodes[get_group_leader() - 1].account.hostname \ == new_leader_node.account.hostname leader_node = self.redpanda.nodes[get_group_leader() - 1] check_metric_from_node(leader_node) # Check transfer leadership to another node for i in range(3): new_leader_node = random.choice( list(filter(lambda x: x != leader_node, self.redpanda.nodes))) admin.transfer_leadership_to( namespace="kafka_internal", topic="group", partition=0, target=self.redpanda.idx(new_leader_node)) wait_until(lambda: transfer_completed(new_leader_node) and check_metric_from_node(new_leader_node), timeout_sec=30, backoff_sec=5) leader_node = new_leader_node # Check transfer leadership to same node admin.transfer_leadership_to(namespace="kafka_internal", topic="group", partition=0, target=self.redpanda.idx(leader_node)) wait_until(lambda: transfer_completed(leader_node) and check_metric_from_node(leader_node), timeout_sec=30, backoff_sec=5) for host in producers + consumers: host.stop() host.free()
def test_leadership_transfer(self): topics = list(filter(lambda x: x.partition_count > 1, self.topics)) group = "g0" producers = [] for topic in topics: producer = RpkProducer(self._ctx, self.redpanda, topic.name, msg_size=5, msg_count=1000) producer.start() producers.append(producer) consumers = [] for topic in topics: consumer = RpkConsumer(self._ctx, self.redpanda, topic.name, group=group) consumer.start() consumers.append(consumer) # Wait until cluster starts producing metrics wait_until( lambda: self.redpanda.metrics_sample("kafka_group_offset") != None, timeout_sec=30, backoff_sec=5) admin = Admin(redpanda=self.redpanda) def get_group_partition(): return admin.get_partitions(namespace="kafka", topic="__consumer_offsets", partition=0) def get_group_leader(): return get_group_partition()['leader_id'] def metrics_from_single_node(node): """ Check that metrics are produced only by the given node. """ metrics = self.redpanda.metrics_sample("kafka_group_offset") if not metrics: self.logger.debug("No metrics found") return False metrics = metrics.label_filter(dict(group=group)).samples for metric in metrics: self.logger.debug( f"Retrieved metric from node={metric.node.account.hostname}: {metric}" ) return all([ metric.node.account.hostname == node.account.hostname for metric in metrics ]) def transfer_leadership(new_leader): """ Request leadership transfer of the internal consumer group partition and check that it completes successfully. """ self.logger.debug( f"Transferring leadership to {new_leader.account.hostname}") admin.transfer_leadership_to(namespace="kafka", topic="__consumer_offsets", partition=0, target=self.redpanda.idx(new_leader)) for _ in range(3): # re-check a few times leader = get_group_leader() self.logger.debug(f"Current leader: {leader}") if leader != -1 and self.redpanda.get_node( leader) == new_leader: return True time.sleep(1) return False def partition_ready(): """ All replicas present and known leader """ partition = get_group_partition() self.logger.debug(f"XXXXX: {partition}") return len( partition['replicas']) == 3 and partition['leader_id'] >= 0 def select_next_leader(): """ Select a leader different than the current leader """ wait_until(partition_ready, timeout_sec=30, backoff_sec=5) partition = get_group_partition() replicas = partition['replicas'] assert len(replicas) == 3 leader = partition['leader_id'] assert leader >= 0 replicas = filter(lambda r: r["node_id"] != leader, replicas) new_leader = random.choice(list(replicas))['node_id'] return self.redpanda.get_node(new_leader) # repeat the following test a few times. # # 1. transfer leadership to a new node # 2. check that new leader reports metrics # 3. check that prev leader does not report # # note that here reporting does not mean that the node does not report # any metrics but that it does not report metrics for consumer groups # for which it is not leader. for _ in range(4): new_leader = select_next_leader() wait_until(lambda: transfer_leadership(new_leader), timeout_sec=30, backoff_sec=5) wait_until(lambda: metrics_from_single_node(new_leader), timeout_sec=30, backoff_sec=5) for host in producers + consumers: host.stop() host.free()
def test_consumer_group_mirroring(self, source_type): # start redpanda self.start_brokers(source_type=source_type) consumer_group = "test-group-1" # start mirror maker self.mirror_maker = MirrorMaker2(self.test_context, num_nodes=1, source_cluster=self.source_broker, target_cluster=self.redpanda, consumer_group_pattern=consumer_group, log_level="TRACE") self.mirror_maker.start() msg_size = 512 msg_cnt = 1000000 if self.redpanda.dedicated_nodes else 100 # produce some messages to source redpanda producer = RpkProducer(self.test_context, self.source_broker, self.topic.name, msg_size, msg_cnt, acks=-1) producer.start() producer.wait() producer.free() # consume some messages from source redpanda consumer = RpkConsumer(self.test_context, self.source_broker, self.topic.name, ignore_errors=False, retries=3, group=consumer_group, save_msgs=False, num_msgs=int(msg_cnt / 5)) consumer.start() consumer.wait() consumer.stop() source_messages = consumer.messages self.logger.info(f"source message count: {len(source_messages)}") consumer.free() src_rpk = RpkTool(self.source_broker) source_group = src_rpk.group_describe(consumer_group) target_rpk = RpkTool(self.redpanda) def target_group_equal(): try: target_group = target_rpk.group_describe(consumer_group) except RpkException as e: # e.g. COORDINATOR_NOT_AVAILABLE self.logger.info(f"Error describing target cluster group: {e}") return False self.logger.info( f"source {source_group}, target_group: {target_group}") return target_group.partitions == source_group.partitions and target_group.name == source_group.name # wait for consumer group sync timeout = 600 if self.redpanda.dedicated_nodes else 60 wait_until(target_group_equal, timeout_sec=timeout, backoff_sec=5) self.mirror_maker.stop()
def test_static(self): """ Move partitions with data, but no active producers or consumers. """ self.logger.info(f"Starting redpanda...") self.start_redpanda(num_nodes=3) topics = [] for partition_count in range(1, 5): for replication_factor in (1, 3): name = f"topic{len(topics)}" spec = TopicSpec(name=name, partition_count=partition_count, replication_factor=replication_factor) topics.append(spec) self.logger.info(f"Creating topics...") for spec in topics: self.client().create_topic(spec) num_records = 1000 produced = set( ((f"key-{i:08d}", f"record-{i:08d}") for i in range(num_records))) for spec in topics: self.logger.info(f"Producing to {spec}") producer = KafProducer(self.test_context, self.redpanda, spec.name, num_records) producer.start() self.logger.info( f"Finished producing to {spec}, waiting for producer...") producer.wait() producer.free() self.logger.info(f"Producer stop complete.") for _ in range(25): self._move_and_verify() for spec in topics: self.logger.info(f"Verifying records in {spec}") consumer = RpkConsumer(self.test_context, self.redpanda, spec.name, ignore_errors=False, retries=0) consumer.start() timeout = 30 t1 = time.time() consumed = set() while consumed != produced: if time.time() > t1 + timeout: self.logger.error( f"Validation failed for topic {spec.name}. Produced {len(produced)}, consumed {len(consumed)}" ) self.logger.error( f"Messages consumed but not produced: {sorted(consumed - produced)}" ) self.logger.error( f"Messages produced but not consumed: {sorted(produced - consumed)}" ) assert set(consumed) == produced else: time.sleep(5) for m in consumer.messages: self.logger.info(f"message: {m}") consumed = set([(m['key'], m['value']) for m in consumer.messages]) self.logger.info(f"Stopping consumer...") consumer.stop() self.logger.info(f"Awaiting consumer...") consumer.wait() self.logger.info(f"Freeing consumer...") consumer.free() self.logger.info(f"Finished verifying records in {spec}")