def test_delete_topic_from_ongoin_tx(self): tx_id = "0" producer = ck.Producer({ 'bootstrap.servers': self.redpanda.brokers(), 'transactional.id': tx_id, }) producer.init_transactions() producer.begin_transaction() for topic in self.topics: for partition in range(topic.partition_count): producer.produce(topic.name, '0', '0', partition) producer.flush() txs_info = self.admin.get_all_transactions() assert len( txs_info) == 1, "Should be only one transaction in current time" rpk = RpkTool(self.redpanda) topic_name = self.topics[0].name rpk.delete_topic(topic_name) tx = txs_info[0] assert tx[ "transactional_id"] == tx_id, f"Expected transactional_id: {tx_id}, but got {tx['transactional_id']}" for partition in tx["partitions"]: assert (partition["ns"] == "kafka") if partition["topic"] == topic_name: self.admin.delete_partition_from_transaction( tx["transactional_id"], partition["ns"], partition["topic"], partition["partition_id"], partition["etag"]) producer.commit_transaction() producer = ck.Producer({ 'bootstrap.servers': self.redpanda.brokers(), 'transactional.id': tx_id, }) producer.init_transactions() producer.begin_transaction() for topic in self.topics: if topic.name is not topic_name: for partition in range(topic.partition_count): producer.produce(topic.name, '0', '0', partition) producer.commit_transaction()
def test_delete_topic_from_prepared_tx(self): tx_id = "0" producer = ck.Producer({ 'bootstrap.servers': self.redpanda.brokers(), 'transactional.id': tx_id, }) producer.init_transactions() producer.begin_transaction() for topic in self.topics: for partition in range(topic.partition_count): producer.produce(topic.name, '0', '0', partition) producer.flush() rpk = RpkTool(self.redpanda) topic_name = self.topics[0].name rpk.delete_topic(topic_name) try: producer.commit_transaction() raise Exception("commit_transaction should fail") except ck.cimpl.KafkaException as e: kafka_error = e.args[0] assert kafka_error.code() == ck.cimpl.KafkaError.UNKNOWN producer = ck.Producer({ 'bootstrap.servers': self.redpanda.brokers(), 'transactional.id': tx_id, }) try: producer.init_transactions() raise Exception("init_transaction should fail") except ck.cimpl.KafkaException as e: kafka_error = e.args[0] assert kafka_error.code( ) == ck.cimpl.KafkaError.BROKER_NOT_AVAILABLE txs_info = self.admin.get_all_transactions() assert len( txs_info) == 1, "Should be only one transaction in current time" tx = txs_info[0] for partition in tx["partitions"]: assert (partition["ns"] == "kafka") if partition["topic"] == topic_name: self.admin.delete_partition_from_transaction( tx["transactional_id"], partition["ns"], partition["topic"], partition["partition_id"], partition["etag"]) producer = ck.Producer({ 'bootstrap.servers': self.redpanda.brokers(), 'transactional.id': tx_id, }) producer.init_transactions() producer.begin_transaction() for topic in self.topics: if topic.name is not topic_name: for partition in range(topic.partition_count): producer.produce(topic.name, '0', '0', partition) producer.commit_transaction()
def test_overlapping_changes(self): """ Check that while a movement is in flight, rules about overlapping operations are properly enforced. """ self.start_redpanda(num_nodes=4) node_ids = {1, 2, 3, 4} # Create topic with enough data that inter-node movement # will take a while. name = f"movetest" spec = TopicSpec(name=name, partition_count=1, replication_factor=3) self.client().create_topic(spec) # Wait for the partition to have a leader (`rpk produce` errors # out if it tries to write data before this) def partition_ready(): return KafkaCat(self.redpanda).get_partition_leader( name, 0)[0] is not None wait_until(partition_ready, timeout_sec=10, backoff_sec=0.5) # Write a substantial amount of data to the topic msg_size = 512 * 1024 write_bytes = 512 * 1024 * 1024 producer = RpkProducer(self._ctx, self.redpanda, name, msg_size=msg_size, msg_count=int(write_bytes / msg_size)) t1 = time.time() producer.start() # This is an absurdly low expected throughput, but necessarily # so to run reliably on current test runners, which share an EBS # backend among many parallel tests. 10MB/s has been empirically # shown to be too high an expectation. expect_bps = 1 * 1024 * 1024 expect_runtime = write_bytes / expect_bps producer.wait(timeout_sec=expect_runtime) self.logger.info( f"Write complete {write_bytes} in {time.time() - t1} seconds") # - Admin API redirects writes but not reads. Because we want synchronous # status after submitting operations, send all operations to the controller # leader. This is not necessary for operations to work, just to simplify # this test by letting it see synchronous status updates. # - Because we will later verify that a 503 is sent in response to # a move request to an in_progress topic, set retry_codes=[] to # disable default retries on 503. admin_node = self.redpanda.controller() admin = Admin(self.redpanda, default_node=admin_node, retry_codes=[]) # Start an inter-node move, which should take some time # to complete because of recovery network traffic assignments = self._get_assignments(admin, name, 0) new_node = list(node_ids - set([a['node_id'] for a in assignments]))[0] self.logger.info(f"old assignments: {assignments}") old_assignments = assignments assignments = assignments[1:] + [{'node_id': new_node, 'core': 0}] self.logger.info(f"new assignments: {assignments}") r = admin.set_partition_replicas(name, 0, assignments) r.raise_for_status() assert admin.get_partitions(name, 0)['status'] == "in_progress" # Another move should fail assert admin.get_partitions(name, 0)['status'] == "in_progress" try: r = admin.set_partition_replicas(name, 0, old_assignments) except requests.exceptions.HTTPError as e: assert e.response.status_code == 503 else: raise RuntimeError(f"Expected 503 but got {r.status_code}") # An update to partition properties should succeed # (issue https://github.com/vectorizedio/redpanda/issues/2300) rpk = RpkTool(self.redpanda) assert admin.get_partitions(name, 0)['status'] == "in_progress" rpk.alter_topic_config(name, "retention.ms", "3600000") # A deletion should succeed assert name in rpk.list_topics() assert admin.get_partitions(name, 0)['status'] == "in_progress" rpk.delete_topic(name) assert name not in rpk.list_topics()
def test_recreated_topic_metadata_are_valid(self, replication_factor): """ Test recreated topic metadata are valid across all the nodes """ topic = 'tp-test' partition_count = 5 rpk = RpkTool(self.redpanda) kcat = KafkaCat(self.redpanda) admin = Admin(self.redpanda) # create topic with replication factor of 3 rpk.create_topic(topic='tp-test', partitions=partition_count, replicas=replication_factor) # produce some data to the topic def wait_for_leader(partition, expected_leader): leader, _ = kcat.get_partition_leader(topic, partition) return leader == expected_leader def transfer_all_leaders(): partitions = rpk.describe_topic(topic) for p in partitions: replicas = set(p.replicas) replicas.remove(p.leader) target = random.choice(list(replicas)) admin.partition_transfer_leadership("kafka", topic, p.id, target) wait_until(lambda: wait_for_leader(p.id, target), timeout_sec=30, backoff_sec=1) msg_cnt = 100 producer = RpkProducer(self.test_context, self.redpanda, topic, 16384, msg_cnt, acks=-1) producer.start() producer.wait() producer.free() # transfer leadership to grow the term for i in range(0, 10): transfer_all_leaders() # recreate the topic rpk.delete_topic(topic) rpk.create_topic(topic='tp-test', partitions=partition_count, replicas=3) def metadata_consistent(): # validate leadership information on each node for p in range(0, partition_count): leaders = set() for n in self.redpanda.nodes: admin_partition = admin.get_partitions(topic=topic, partition=p, namespace="kafka", node=n) self.logger.info( f"node: {n.account.hostname} partition: {admin_partition}" ) leaders.add(admin_partition['leader_id']) self.logger.info(f"{topic}/{p} leaders: {leaders}") if len(leaders) != 1: return False return True wait_until(metadata_consistent, 45, backoff_sec=2)
def test_deletion_stops_move(self): """ Delete topic which partitions are being moved and check status after topic is created again, old move opeartions should not influcence newly created topic """ self.start_redpanda(num_nodes=3) # create a single topic with replication factor of 1 topic = 'test-topic' rpk = RpkTool(self.redpanda) rpk.create_topic(topic, 1, 1) partition = 0 num_records = 1000 self.logger.info(f"Producing to {topic}") producer = KafProducer(self.test_context, self.redpanda, topic, num_records) producer.start() self.logger.info( f"Finished producing to {topic}, waiting for producer...") producer.wait() producer.free() self.logger.info(f"Producer stop complete.") admin = Admin(self.redpanda) # get current assignments assignments = self._get_assignments(admin, topic, partition) assert len(assignments) == 1 self.logger.info(f"assignments for {topic}-{partition}: {assignments}") brokers = admin.get_brokers() self.logger.info(f"available brokers: {brokers}") candidates = list( filter(lambda b: b['node_id'] != assignments[0]['node_id'], brokers)) replacement = random.choice(candidates) target_assignment = [{'node_id': replacement['node_id'], 'core': 0}] self.logger.info( f"target assignments for {topic}-{partition}: {target_assignment}") # shutdown target node to make sure that move will never complete node = self.redpanda.get_node(replacement['node_id']) self.redpanda.stop_node(node) admin.set_partition_replicas(topic, partition, target_assignment) # check that the status is in progress def get_status(): partition_info = admin.get_partitions(topic, partition) self.logger.info( f"current assignments for {topic}-{partition}: {partition_info}" ) return partition_info["status"] wait_until(lambda: get_status() == 'in_progress', 10, 1) # delete the topic rpk.delete_topic(topic) # start the node back up self.redpanda.start_node(node) # create topic again rpk.create_topic(topic, 1, 1) wait_until(lambda: get_status() == 'done', 10, 1)