def test_retention_archival_coordination(self): """ Test that only archived segments can be evicted and that eviction restarts once the segments have been archived. """ self.kafka_tools.alter_topic_config( self.topic, { TopicSpec.PROPERTY_RETENTION_BYTES: 5 * self.log_segment_size, }, ) with firewall_blocked(self.redpanda.nodes, self._get_s3_endpoint_ip()): produce_until_segments(redpanda=self.redpanda, topic=self.topic, partition_idx=0, count=10) # Sleep some time sufficient for log eviction under normal conditions # and check that no segment has been evicted (because we can't upload # segments to the cloud storage). time.sleep(3 * self.log_compaction_interval_ms / 1000.0) counts = list( segments_count(self.redpanda, self.topic, partition_idx=0)) self.logger.info(f"node segment counts: {counts}") assert len(counts) == len(self.redpanda.nodes) assert all(c >= 10 for c in counts) # Check that eviction restarts after we restored the connection to cloud # storage. wait_for_segments_removal(redpanda=self.redpanda, topic=self.topic, partition_idx=0, count=6)
def test_fetch_after_committed_offset_was_removed(self, transactions_enabled): """ Test fetching when consumer offset was deleted by retention """ self.redpanda._extra_rp_conf[ "enable_transactions"] = transactions_enabled self.redpanda._extra_rp_conf[ "enable_idempotence"] = transactions_enabled self.redpanda.start() topic = TopicSpec(partition_count=1, replication_factor=3, cleanup_policy=TopicSpec.CLEANUP_DELETE) self.client().create_topic(topic) kafka_tools = KafkaCliTools(self.redpanda) # produce until segments have been compacted produce_until_segments( self.redpanda, topic=topic.name, partition_idx=0, count=10, ) consumer_group = 'test' rpk = RpkTool(self.redpanda) def consume(n=1): out = rpk.consume(topic.name, group=consumer_group, n=n) split = out.split('}') split = filter(lambda s: "{" in s, split) return map(lambda s: json.loads(s + "}"), split) #consume from the beggining msgs = consume(10) last = list(msgs).pop() offset = last['offset'] # change retention time kafka_tools.alter_topic_config( topic.name, { TopicSpec.PROPERTY_RETENTION_BYTES: 2 * self.segment_size, }) wait_for_segments_removal(self.redpanda, topic.name, partition_idx=0, count=5) partitions = list(rpk.describe_topic(topic.name)) p = partitions[0] assert p.start_offset > offset # consume from the offset that doesn't exists, # the one that was committed previously was already removed out = list(consume(1)) assert out[0]['offset'] == p.start_offset
def test_consume_from_blocked_s3(self): produce_until_segments(redpanda=self.redpanda, topic=self.s3_topic_name, partition_idx=0, count=5, acks=-1) self.rpk.alter_topic_config(self.s3_topic_name, TopicSpec.PROPERTY_RETENTION_BYTES, self.retention_bytes) wait_for_segments_removal(redpanda=self.redpanda, topic=self.s3_topic_name, partition_idx=0, count=4) """Disconnect redpanda from S3 and try to read starting with offset 0""" with firewall_blocked(self.redpanda.nodes, self._s3_port): try: out = self.rpk.consume(topic=self.s3_topic_name) except RpkException as e: assert 'timed out' in e.msg else: raise RuntimeError( f"RPK consume should have timed out, but ran with output: {out}" )
def test_changing_topic_retention(self, property, acks): """ Test changing topic retention duration for topics with data produced with ACKS=1 and ACKS=-1. This test produces data until 10 segments appear, then it changes retention topic property and waits for segments to be removed """ kafka_tools = KafkaCliTools(self.redpanda) # produce until segments have been compacted produce_until_segments( self.redpanda, topic=self.topic, partition_idx=0, count=10, acks=acks, ) # change retention time kafka_tools.alter_topic_config(self.topic, { property: 10000, }) wait_for_segments_removal(self.redpanda, self.topic, partition_idx=0, count=5)
def test_write(self): """Write at least 10 segments, set retention policy to leave only 5 segments, wait for segments removal, consume data and run validation, that everything that is acked is consumed.""" self.start_producer() produce_until_segments( redpanda=self.redpanda, topic=self.topic, partition_idx=0, count=10, ) self.kafka_tools.alter_topic_config( self.topic, { TopicSpec.PROPERTY_RETENTION_BYTES: 5 * EndToEndShadowIndexingTest.segment_size, }, ) wait_for_segments_removal(redpanda=self.redpanda, topic=self.topic, partition_idx=0, count=6) self.start_consumer() self.run_validation()
def test_write_with_node_failures(self): self.start_producer() produce_until_segments( redpanda=self.redpanda, topic=self.topic, partition_idx=0, count=10, ) self.kafka_tools.alter_topic_config( self.topic, { TopicSpec.PROPERTY_RETENTION_BYTES: 5 * EndToEndShadowIndexingTest.segment_size }, ) with random_process_kills(self.redpanda) as ctx: wait_for_segments_removal(redpanda=self.redpanda, topic=self.topic, partition_idx=0, count=6) self.start_consumer() self.run_validation() ctx.assert_actions_triggered()
def test_querying_remote_partitions(self): topic = TopicSpec(redpanda_remote_read=True, redpanda_remote_write=True) epoch_offsets = {} rpk = RpkTool(self.redpanda) self.client().create_topic(topic) rpk.alter_topic_config(topic.name, "redpanda.remote.read", 'true') rpk.alter_topic_config(topic.name, "redpanda.remote.write", 'true') def wait_for_topic(): wait_until(lambda: len(list(rpk.describe_topic(topic.name))) > 0, 30, backoff_sec=2) # restart whole cluster 6 times to trigger term rolls for i in range(0, 6): wait_for_topic() produce_until_segments( redpanda=self.redpanda, topic=topic.name, partition_idx=0, count=2 * i, ) res = list(rpk.describe_topic(topic.name)) epoch_offsets[res[0].leader_epoch] = res[0].high_watermark self.redpanda.restart_nodes(self.redpanda.nodes) self.logger.info(f"ledear epoch high watermarks: {epoch_offsets}") wait_for_topic() rpk.alter_topic_config(topic.name, TopicSpec.PROPERTY_RETENTION_BYTES, OffsetForLeaderEpochArchivalTest.segment_size) wait_for_segments_removal(redpanda=self.redpanda, topic=topic.name, partition_idx=0, count=7) kcl = KCL(self.redpanda) for epoch, offset in epoch_offsets.items(): self.logger.info(f"querying partition epoch {epoch} end offsets") epoch_end_offset = kcl.offset_for_leader_epoch( topics=topic.name, leader_epoch=epoch)[0].epoch_end_offset self.logger.info( f"epoch {epoch} end_offset: {epoch_end_offset}, expected offset: {offset}" ) assert epoch_end_offset == offset
def test_changing_topic_retention_with_restart(self): """ Test changing topic retention duration for topics with data produced with ACKS=1 and ACKS=-1. This test produces data until 10 segments appear, then it changes retention topic property and waits for some segmetnts to be removed """ segment_size = 1048576 # produce until segments have been compacted produce_until_segments( self.redpanda, topic=self.topic, partition_idx=0, count=20, acks=-1, ) # restart all nodes to force replicating raft configuration self.redpanda.restart_nodes(self.redpanda.nodes) kafka_tools = KafkaCliTools(self.redpanda) # Wait for controller, alter configs doesn't have a retry loop kafka_tools.describe_topic(self.topic) # change retention bytes to preserve 15 segments self.client().alter_topic_configs( self.topic, { TopicSpec.PROPERTY_RETENTION_BYTES: 15 * segment_size, }) wait_for_segments_removal(redpanda=self.redpanda, topic=self.topic, partition_idx=0, count=16) # change retention bytes again to preserve 10 segments self.client().alter_topic_configs( self.topic, { TopicSpec.PROPERTY_RETENTION_BYTES: 10 * segment_size, }) wait_for_segments_removal(redpanda=self.redpanda, topic=self.topic, partition_idx=0, count=11) # change retention bytes again to preserve 5 segments self.client().alter_topic_configs( self.topic, { TopicSpec.PROPERTY_RETENTION_BYTES: 4 * segment_size, }) wait_for_segments_removal(redpanda=self.redpanda, topic=self.topic, partition_idx=0, count=5)
def test_shadow_indexing_aborted_txs(self): """Check that messages belonging to aborted transaction are not seen by clients when fetching from remote segments.""" topic = self.topics[0] class Producer: def __init__(self, brokers, logger): self.keys = [] self.cur_offset = 0 self.brokers = brokers self.logger = logger self.num_aborted = 0 self.reconnect() def reconnect(self): self.producer = ck.Producer({ 'bootstrap.servers': self.brokers, 'transactional.id': 'shadow-indexing-tx-test', }) self.producer.init_transactions() def produce(self, topic): """produce some messages inside a transaction with increasing keys and random values. Then randomly commit/abort the transaction.""" n_msgs = random.randint(50, 100) keys = [] self.producer.begin_transaction() for _ in range(n_msgs): val = ''.join( map(chr, (random.randint(0, 256) for _ in range(random.randint(100, 1000))))) self.producer.produce(topic.name, val, str(self.cur_offset)) keys.append(str(self.cur_offset).encode('utf8')) self.cur_offset += 1 self.logger.info( f"writing {len(keys)} msgs: {keys[0]}-{keys[-1]}...") self.producer.flush() if random.random() < 0.1: self.producer.abort_transaction() self.num_aborted += 1 self.logger.info("aborted txn") else: self.producer.commit_transaction() self.keys.extend(keys) producer = Producer(self.redpanda.brokers(), self.logger) def done(): for _ in range(100): try: producer.produce(topic) except ck.KafkaException as err: self.logger.warn(f"producer error: {err}") producer.reconnect() self.logger.info("producer iteration complete") topic_partitions = segments_count(self.redpanda, topic.name, partition_idx=0) partitions = [] for p in topic_partitions: partitions.append(p >= 10) return all(partitions) wait_until(done, timeout_sec=120, backoff_sec=1, err_msg="producing failed") assert producer.num_aborted > 0 kafka_tools = KafkaCliTools(self.redpanda) kafka_tools.alter_topic_config( self.topic, { TopicSpec.PROPERTY_RETENTION_BYTES: 3 * self.segment_size, }, ) wait_for_segments_removal(redpanda=self.redpanda, topic=self.topic, partition_idx=0, count=6) consumer = ck.Consumer( { 'bootstrap.servers': self.redpanda.brokers(), 'group.id': 'shadow-indexing-tx-test', 'auto.offset.reset': 'earliest', }, logger=self.logger) consumer.subscribe([topic.name]) consumed = [] while True: msgs = consumer.consume(timeout=5.0) if len(msgs) == 0: break consumed.extend([(m.key(), m.offset()) for m in msgs]) first_mismatch = '' for p_key, (c_key, c_offset) in zip_longest(producer.keys, consumed): if p_key != c_key: first_mismatch = f"produced: {p_key}, consumed: {c_key} (offset: {c_offset})" break assert (not first_mismatch), ( f"produced and consumed messages differ, " f"produced length: {len(producer.keys)}, consumed length: {len(consumed)}, " f"first mismatch: {first_mismatch}")