def stress_test(self): for i in range(10): spec = TopicSpec(partition_count=2, cleanup_policy=TopicSpec.CLEANUP_COMPACT) topic_name = spec.name self.client().create_topic(spec) producer = RpkProducer(self.test_context, self.redpanda, topic_name, 1024, 100000) producer.start() metrics = [ MetricCheck(self.logger, self.redpanda, n, 'vectorized_storage_log_compacted_segment_total', {}, sum) for n in self.redpanda.nodes ] def check_compaction(): return all([ m.evaluate([ ('vectorized_storage_log_compacted_segment_total', lambda a, b: b > 3) ]) for m in metrics ]) wait_until(check_compaction, timeout_sec=120, backoff_sec=5, err_msg="Segments were not compacted") self.client().delete_topic(topic_name) try: producer.stop() except: # Should ignore exception form rpk pass producer.free() def topic_storage_purged(): storage = self.redpanda.storage() return all( map(lambda n: topic_name not in n.ns["kafka"].topics, storage.nodes)) try: wait_until(lambda: topic_storage_purged(), timeout_sec=60, backoff_sec=2, err_msg="Topic storage was not removed") except: # On errors, dump listing of the storage location for node in self.redpanda.nodes: self.logger.error(f"Storage listing on {node.name}:") for line in node.account.ssh_capture( f"find {self.redpanda.DATA_DIR}"): self.logger.error(line.strip()) raise
def test_leader_transfers_recovery(self, acks): """ Validate that leadership transfers complete successfully under acks=1 writes that prompt the leader to frequently activate recovery_stm. When acks=1, this is a reproducer for https://github.com/vectorizedio/redpanda/issues/2580 When acks=-1, this is a reproducer rfor https://github.com/vectorizedio/redpanda/issues/2606 """ leader_node_id, replicas = self._wait_for_leader() if acks == -1: producer = RpkProducer(self._ctx, self.redpanda, self.topic, 16384, sys.maxsize, acks=acks) else: # To reproduce acks=1 issue, we need an intermittent producer that # waits long enough between messages to let recovery_stm go to sleep # waiting for follower_state_change # KafProducer is intermittent because it starts a fresh process for # each message, whereas RpkProducer writes a continuous stream. # TODO: create a test traffic generator that has inter-message # delay as an explicit parameter, rather than relying on implementation # details of the producer helpers. producer = KafProducer(self._ctx, self.redpanda, self.topic) producer.start() # Pass leadership around in a ring self.logger.info(f"Initial leader of {self.topic} is {leader_node_id}") transfer_count = 50 # FIXME: with a transfer count >100, we tend to see # reactor stalls and corresponding nondeterministic behaviour/failures. # This appears unrelated to the functionality under test, something else # is tripping up the cluster when we have so many leadership transfers. # https://github.com/vectorizedio/redpanda/issues/2623 admin = Admin(self.redpanda) initial_leader_id = leader_node_id for n in range(0, transfer_count): target_idx = (initial_leader_id + n) % len(self.redpanda.nodes) target_node_id = target_idx + 1 self.logger.info(f"Starting transfer to {target_node_id}") admin.partition_transfer_leadership("kafka", self.topic, 0, target_node_id) self._wait_for_leader( lambda l: l is not None and l == target_node_id, timeout=ELECTION_TIMEOUT * 2) self.logger.info(f"Completed transfer to {target_node_id}") self.logger.info(f"Completed {transfer_count} transfers successfully") # Explicit stop of producer so that we see any errors producer.stop() producer.wait() producer.free()