class StreamsBounceTest(KafkaTest): """ Simple test of Kafka Streams. """ def __init__(self, test_context): super(StreamsBounceTest, self).__init__(test_context, num_zk=1, num_brokers=3, topics={ 'echo' : { 'partitions': 5, 'replication-factor': 2 }, 'data' : { 'partitions': 5, 'replication-factor': 2 }, 'min' : { 'partitions': 5, 'replication-factor': 2 }, 'max' : { 'partitions': 5, 'replication-factor': 2 }, 'sum' : { 'partitions': 5, 'replication-factor': 2 }, 'dif' : { 'partitions': 5, 'replication-factor': 2 }, 'cnt' : { 'partitions': 5, 'replication-factor': 2 }, 'avg' : { 'partitions': 5, 'replication-factor': 2 }, 'wcnt' : { 'partitions': 5, 'replication-factor': 2 }, 'tagg' : { 'partitions': 5, 'replication-factor': 2 } }) self.driver = StreamsSmokeTestDriverService(test_context, self.kafka) self.processor1 = StreamsSmokeTestJobRunnerService(test_context, self.kafka) @cluster(num_nodes=6) def test_bounce(self): """ Start a smoke test client, then abort (kill -9) and restart it a few times. Ensure that all records are delivered. """ self.driver.start() self.processor1.start() time.sleep(15) self.processor1.abortThenRestart() time.sleep(15) # enable this after we add change log partition replicas #self.kafka.signal_leader("data") #time.sleep(15); self.processor1.abortThenRestart() self.driver.wait() self.driver.stop() self.processor1.stop() node = self.driver.node node.account.ssh("grep ALL-RECORDS-DELIVERED %s" % self.driver.STDOUT_FILE, allow_fail=False)
class StreamsSmokeTest(KafkaTest): """ Simple test of Kafka Streams. """ def __init__(self, test_context): super(StreamsSmokeTest, self).__init__(test_context, num_zk=1, num_brokers=3, topics={ 'echo' : { 'partitions': 5, 'replication-factor': 1 }, 'data' : { 'partitions': 5, 'replication-factor': 1 }, 'min' : { 'partitions': 5, 'replication-factor': 1 }, 'max' : { 'partitions': 5, 'replication-factor': 1 }, 'sum' : { 'partitions': 5, 'replication-factor': 1 }, 'dif' : { 'partitions': 5, 'replication-factor': 1 }, 'cnt' : { 'partitions': 5, 'replication-factor': 1 }, 'avg' : { 'partitions': 5, 'replication-factor': 1 }, 'wcnt' : { 'partitions': 5, 'replication-factor': 1 }, 'tagg' : { 'partitions': 5, 'replication-factor': 1 } }) self.driver = StreamsSmokeTestDriverService(test_context, self.kafka) self.processor1 = StreamsSmokeTestJobRunnerService(test_context, self.kafka) self.processor2 = StreamsSmokeTestJobRunnerService(test_context, self.kafka) self.processor3 = StreamsSmokeTestJobRunnerService(test_context, self.kafka) self.processor4 = StreamsSmokeTestJobRunnerService(test_context, self.kafka) @cluster(num_nodes=9) def test_streams(self): """ Start a few smoke test clients, then repeat start a new one, stop (cleanly) running one a few times. Ensure that all results (stats on values computed by Kafka Streams) are correct. """ self.driver.start() self.processor1.start() self.processor2.start() time.sleep(15) self.processor3.start() self.processor1.stop() time.sleep(15) self.processor4.start() self.driver.wait() self.driver.stop() self.processor2.stop() self.processor3.stop() self.processor4.stop() node = self.driver.node node.account.ssh("grep SUCCESS %s" % self.driver.STDOUT_FILE, allow_fail=False)
class StreamsSmokeTest(KafkaTest): """ Simple test of Kafka Streams. """ def __init__(self, test_context): super(StreamsSmokeTest, self).__init__(test_context, num_zk=1, num_brokers=2, topics={ 'echo' : { 'partitions': 5, 'replication-factor': 1 }, 'data' : { 'partitions': 5, 'replication-factor': 1 }, 'min' : { 'partitions': 5, 'replication-factor': 1 }, 'max' : { 'partitions': 5, 'replication-factor': 1 }, 'sum' : { 'partitions': 5, 'replication-factor': 1 }, 'dif' : { 'partitions': 5, 'replication-factor': 1 }, 'cnt' : { 'partitions': 5, 'replication-factor': 1 }, 'avg' : { 'partitions': 5, 'replication-factor': 1 }, 'wcnt' : { 'partitions': 5, 'replication-factor': 1 }, 'tagg' : { 'partitions': 5, 'replication-factor': 1 } }) self.driver = StreamsSmokeTestDriverService(test_context, self.kafka) self.processor1 = StreamsSmokeTestJobRunnerService(test_context, self.kafka) self.processor2 = StreamsSmokeTestJobRunnerService(test_context, self.kafka) self.processor3 = StreamsSmokeTestJobRunnerService(test_context, self.kafka) self.processor4 = StreamsSmokeTestJobRunnerService(test_context, self.kafka) @cluster(num_nodes=8) def test_streams(self): """ Start a few smoke test clients, then repeat start a new one, stop (cleanly) running one a few times. Ensure that all results (stats on values computed by Kafka Streams) are correct. """ self.driver.start() self.processor1.start() self.processor2.start() time.sleep(15) self.processor3.start() self.processor1.stop() time.sleep(15) self.processor4.start() self.driver.wait() self.driver.stop() self.processor2.stop() self.processor3.stop() self.processor4.stop() node = self.driver.node node.account.ssh("grep SUCCESS %s" % self.driver.STDOUT_FILE, allow_fail=False)
class StreamsBrokerBounceTest(Test): """ Simple test of Kafka Streams with brokers failing """ def __init__(self, test_context): super(StreamsBrokerBounceTest, self).__init__(test_context) self.replication = 3 self.partitions = 3 self.topics = { 'echo': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'data': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'min': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'max': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'sum': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'dif': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'cnt': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'avg': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'wcnt': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'tagg': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, '__consumer_offsets': { 'partitions': 50, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } } } def fail_broker_type(self, failure_mode, broker_type): # Pick a random topic and bounce it's leader topic_index = randint(0, len(self.topics.keys()) - 1) topic = self.topics.keys()[topic_index] failures[failure_mode](self, topic, broker_type) def fail_many_brokers(self, failure_mode, num_failures): sig = signal.SIGTERM if (failure_mode == "clean_shutdown"): sig = signal.SIGTERM else: sig = signal.SIGKILL for num in range(0, num_failures - 1): signal_node(self, self.kafka.nodes[num], sig) def setup_system(self, start_processor=True): # Setup phase self.zk = ZookeeperService(self.test_context, num_nodes=1) self.zk.start() self.kafka = KafkaService(self.test_context, num_nodes=self.replication, zk=self.zk, topics=self.topics) self.kafka.start() # Start test harness self.driver = StreamsSmokeTestDriverService(self.test_context, self.kafka) self.processor1 = StreamsSmokeTestJobRunnerService( self.test_context, self.kafka) self.driver.start() if (start_processor): self.processor1.start() def collect_results(self, sleep_time_secs): data = {} # End test self.driver.wait() self.driver.stop() self.processor1.stop() node = self.driver.node # Success is declared if streams does not crash when sleep time > 0 # It should give an exception when sleep time is 0 since we kill the brokers immediately # and the topic manager cannot create internal topics with the desired replication factor if (sleep_time_secs == 0): output_streams = self.processor1.node.account.ssh_capture( "grep SMOKE-TEST-CLIENT-EXCEPTION %s" % self.processor1.STDOUT_FILE, allow_fail=False) else: output_streams = self.processor1.node.account.ssh_capture( "grep SMOKE-TEST-CLIENT-CLOSED %s" % self.processor1.STDOUT_FILE, allow_fail=False) for line in output_streams: data["Client closed"] = line # Currently it is hard to guarantee anything about Kafka since we don't have exactly once. # With exactly once in place, success will be defined as ALL-RECORDS-DELIEVERD and SUCCESS output = node.account.ssh_capture( "grep -E 'ALL-RECORDS-DELIVERED|PROCESSED-MORE-THAN-GENERATED|PROCESSED-LESS-THAN-GENERATED' %s" % self.driver.STDOUT_FILE, allow_fail=False) for line in output: data["Records Delivered"] = line output = node.account.ssh_capture("grep -E 'SUCCESS|FAILURE' %s" % self.driver.STDOUT_FILE, allow_fail=False) for line in output: data["Logic Success/Failure"] = line return data @cluster(num_nodes=7) @matrix(failure_mode=[ "clean_shutdown", "hard_shutdown", "clean_bounce", "hard_bounce" ], broker_type=["leader", "controller"], sleep_time_secs=[120]) def test_broker_type_bounce(self, failure_mode, broker_type, sleep_time_secs): """ Start a smoke test client, then kill one particular broker and ensure data is still received Record if records are delivered. """ self.setup_system() # Sleep to allow test to run for a bit time.sleep(sleep_time_secs) # Fail brokers self.fail_broker_type(failure_mode, broker_type) return self.collect_results(sleep_time_secs) @ignore @cluster(num_nodes=7) @matrix(failure_mode=["clean_shutdown"], broker_type=["controller"], sleep_time_secs=[0]) def test_broker_type_bounce_at_start(self, failure_mode, broker_type, sleep_time_secs): """ Start a smoke test client, then kill one particular broker immediately before streams stats Streams should throw an exception since it cannot create topics with the desired replication factor of 3 """ self.setup_system(start_processor=False) # Sleep to allow test to run for a bit time.sleep(sleep_time_secs) # Fail brokers self.fail_broker_type(failure_mode, broker_type) self.processor1.start() return self.collect_results(sleep_time_secs) @cluster(num_nodes=7) @matrix(failure_mode=[ "clean_shutdown", "hard_shutdown", "clean_bounce", "hard_bounce" ], num_failures=[2]) def test_many_brokers_bounce(self, failure_mode, num_failures): """ Start a smoke test client, then kill a few brokers and ensure data is still received Record if records are delivered """ self.setup_system() # Sleep to allow test to run for a bit time.sleep(120) # Fail brokers self.fail_many_brokers(failure_mode, num_failures) return self.collect_results(120) @cluster(num_nodes=7) @matrix(failure_mode=["clean_bounce", "hard_bounce"], num_failures=[3]) def test_all_brokers_bounce(self, failure_mode, num_failures): """ Start a smoke test client, then kill a few brokers and ensure data is still received Record if records are delivered """ self.setup_system() # Sleep to allow test to run for a bit time.sleep(120) # Fail brokers self.fail_many_brokers(failure_mode, num_failures) return self.collect_results(120)
def test_upgrade_downgrade_brokers(self, from_version, to_version): """ Start a smoke test client then perform rolling upgrades on the broker. """ if from_version == to_version: return self.replication = 3 self.num_kafka_nodes = 3 self.partitions = 1 self.isr = 2 self.topics = { 'echo': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'data': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'min': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'max': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'sum': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'dif': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'cnt': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'avg': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'wcnt': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'tagg': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } } } # Setup phase self.zk = ZookeeperService(self.test_context, num_nodes=1) self.zk.start() # number of nodes needs to be >= 3 for the smoke test self.kafka = KafkaService(self.test_context, num_nodes=self.num_kafka_nodes, zk=self.zk, version=KafkaVersion(from_version), topics=self.topics) self.kafka.start() # allow some time for topics to be created wait_until(lambda: self.confirm_topics_on_all_brokers( set(self.topics.keys())), timeout_sec=60, err_msg="Broker did not create all topics in 60 seconds ") self.driver = StreamsSmokeTestDriverService(self.test_context, self.kafka) processor = StreamsSmokeTestJobRunnerService(self.test_context, self.kafka) with self.driver.node.account.monitor_log( self.driver.STDOUT_FILE) as driver_monitor: self.driver.start() with processor.node.account.monitor_log( processor.STDOUT_FILE) as monitor: processor.start() monitor.wait_until( self.processed_msg, timeout_sec=60, err_msg="Never saw output '%s' on " % self.processed_msg + str(processor.node)) connected_message = "Discovered group coordinator" with processor.node.account.monitor_log( processor.LOG_FILE) as log_monitor: with processor.node.account.monitor_log( processor.STDOUT_FILE) as stdout_monitor: self.perform_broker_upgrade(to_version) log_monitor.wait_until( connected_message, timeout_sec=120, err_msg=("Never saw output '%s' on " % connected_message) + str(processor.node.account)) stdout_monitor.wait_until( self.processed_msg, timeout_sec=60, err_msg="Never saw output '%s' on" % self.processed_msg + str(processor.node.account)) # SmokeTestDriver allows up to 6 minutes to consume all # records for the verification step so this timeout is set to # 6 minutes (360 seconds) for consuming of verification records # and a very conservative additional 2 minutes (120 seconds) to process # the records in the verification step driver_monitor.wait_until( 'ALL-RECORDS-DELIVERED\|PROCESSED-MORE-THAN-GENERATED', timeout_sec=480, err_msg="Never saw output '%s' on" % 'ALL-RECORDS-DELIVERED|PROCESSED-MORE-THAN-GENERATED' + str(self.driver.node.account)) self.driver.stop() processor.stop() processor.node.account.ssh_capture("grep SMOKE-TEST-CLIENT-CLOSED %s" % processor.STDOUT_FILE, allow_fail=False)
class StreamsUpgradeTest(Test): """ Tests rolling upgrades and downgrades of the Kafka Streams library. """ def __init__(self, test_context): super(StreamsUpgradeTest, self).__init__(test_context) self.replication = 3 self.partitions = 1 self.isr = 2 self.topics = { 'echo': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'data': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'min': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'max': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'sum': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'dif': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'cnt': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'avg': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'wcnt': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } }, 'tagg': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": self.isr } } } def perform_streams_upgrade(self, to_version): self.logger.info("First pass bounce - rolling streams upgrade") # get the node running the streams app node = self.processor1.node self.processor1.stop() # change it's version. This will automatically make it pick up a different # JAR when it starts again node.version = KafkaVersion(to_version) self.processor1.start() def perform_broker_upgrade(self, to_version): self.logger.info("First pass bounce - rolling broker upgrade") for node in self.kafka.nodes: self.kafka.stop_node(node) node.version = KafkaVersion(to_version) self.kafka.start_node(node) @cluster(num_nodes=6) @parametrize(from_version=str(LATEST_0_10_1), to_version=str(DEV_BRANCH)) @parametrize(from_version=str(LATEST_0_10_2), to_version=str(DEV_BRANCH)) @parametrize(from_version=str(LATEST_0_10_1), to_version=str(LATEST_0_11_0)) @parametrize(from_version=str(LATEST_0_10_2), to_version=str(LATEST_0_11_0)) @parametrize(from_version=str(LATEST_0_11_0), to_version=str(LATEST_0_10_2)) @parametrize(from_version=str(DEV_BRANCH), to_version=str(LATEST_0_10_2)) def test_upgrade_downgrade_streams(self, from_version, to_version): """ Start a smoke test client, then abort (kill -9) and restart it a few times. Ensure that all records are delivered. Note, that just like tests/core/upgrade_test.py, a prerequisite for this test to succeed if the inclusion of all parametrized versions of kafka in kafka/vagrant/base.sh (search for get_kafka()). For streams in particular, that means that someone has manually copies the kafka-stream-$version-test.jar in the right S3 bucket as shown in base.sh. """ # Setup phase self.zk = ZookeeperService(self.test_context, num_nodes=1) self.zk.start() # number of nodes needs to be >= 3 for the smoke test self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, version=KafkaVersion(from_version), topics=self.topics) self.kafka.start() # allow some time for topics to be created time.sleep(10) self.driver = StreamsSmokeTestDriverService(self.test_context, self.kafka) self.processor1 = StreamsSmokeTestJobRunnerService( self.test_context, self.kafka) self.driver.start() self.processor1.start() time.sleep(15) self.perform_streams_upgrade(to_version) time.sleep(15) self.driver.wait() self.driver.stop() self.processor1.stop() node = self.driver.node node.account.ssh("grep ALL-RECORDS-DELIVERED %s" % self.driver.STDOUT_FILE, allow_fail=False) self.processor1.node.account.ssh_capture( "grep SMOKE-TEST-CLIENT-CLOSED %s" % self.processor1.STDOUT_FILE, allow_fail=False) @cluster(num_nodes=6) @parametrize(from_version=str(LATEST_0_10_2), to_version=str(DEV_BRANCH)) def test_upgrade_brokers(self, from_version, to_version): """ Start a smoke test client then perform rolling upgrades on the broker. """ # Setup phase self.zk = ZookeeperService(self.test_context, num_nodes=1) self.zk.start() # number of nodes needs to be >= 3 for the smoke test self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, version=KafkaVersion(from_version), topics=self.topics) self.kafka.start() # allow some time for topics to be created time.sleep(10) self.driver = StreamsSmokeTestDriverService(self.test_context, self.kafka) self.processor1 = StreamsSmokeTestJobRunnerService( self.test_context, self.kafka) self.driver.start() self.processor1.start() time.sleep(15) self.perform_broker_upgrade(to_version) time.sleep(15) self.driver.wait() self.driver.stop() self.processor1.stop() node = self.driver.node node.account.ssh("grep ALL-RECORDS-DELIVERED %s" % self.driver.STDOUT_FILE, allow_fail=False) self.processor1.node.account.ssh_capture( "grep SMOKE-TEST-CLIENT-CLOSED %s" % self.processor1.STDOUT_FILE, allow_fail=False)
class StreamsUpgradeTest(Test): """ Test upgrading Kafka Streams (all version combination) If metadata was changes, upgrade is more difficult Metadata version was bumped in 0.10.1.0 and subsequently bumped in 2.0.0 """ def __init__(self, test_context): super(StreamsUpgradeTest, self).__init__(test_context) self.topics = { 'echo': { 'partitions': 5 }, 'data': { 'partitions': 5 }, } processed_msg = "processed [0-9]* records" base_version_number = str(DEV_VERSION).split("-")[0] def perform_broker_upgrade(self, to_version): self.logger.info("First pass bounce - rolling broker upgrade") for node in self.kafka.nodes: self.kafka.stop_node(node) node.version = KafkaVersion(to_version) self.kafka.start_node(node) @cluster(num_nodes=6) @matrix(from_version=smoke_test_versions, to_version=dev_version, bounce_type=["full"]) def test_app_upgrade(self, from_version, to_version, bounce_type): """ Starts 3 KafkaStreams instances with <old_version>, and upgrades one-by-one to <new_version> """ if from_version == to_version: return self.zk = ZookeeperService(self.test_context, num_nodes=1) self.zk.start() self.kafka = KafkaService(self.test_context, num_nodes=1, zk=self.zk, topics={ 'echo': { 'partitions': 5, 'replication-factor': 1 }, 'data': { 'partitions': 5, 'replication-factor': 1 }, 'min': { 'partitions': 5, 'replication-factor': 1 }, 'min-suppressed': { 'partitions': 5, 'replication-factor': 1 }, 'min-raw': { 'partitions': 5, 'replication-factor': 1 }, 'max': { 'partitions': 5, 'replication-factor': 1 }, 'sum': { 'partitions': 5, 'replication-factor': 1 }, 'sws-raw': { 'partitions': 5, 'replication-factor': 1 }, 'sws-suppressed': { 'partitions': 5, 'replication-factor': 1 }, 'dif': { 'partitions': 5, 'replication-factor': 1 }, 'cnt': { 'partitions': 5, 'replication-factor': 1 }, 'avg': { 'partitions': 5, 'replication-factor': 1 }, 'wcnt': { 'partitions': 5, 'replication-factor': 1 }, 'tagg': { 'partitions': 5, 'replication-factor': 1 } }) self.kafka.start() self.driver = StreamsSmokeTestDriverService(self.test_context, self.kafka) self.driver.disable_auto_terminate() self.processor1 = StreamsSmokeTestJobRunnerService( self.test_context, self.kafka, processing_guarantee="at_least_once", replication_factor=1) self.processor2 = StreamsSmokeTestJobRunnerService( self.test_context, self.kafka, processing_guarantee="at_least_once", replication_factor=1) self.processor3 = StreamsSmokeTestJobRunnerService( self.test_context, self.kafka, processing_guarantee="at_least_once", replication_factor=1) self.purge_state_dir(self.processor1) self.purge_state_dir(self.processor2) self.purge_state_dir(self.processor3) self.driver.start() self.start_all_nodes_with(from_version) self.processors = [self.processor1, self.processor2, self.processor3] if bounce_type == "rolling": counter = 1 random.seed() # upgrade one-by-one via rolling bounce random.shuffle(self.processors) for p in self.processors: p.CLEAN_NODE_ENABLED = False self.do_stop_start_bounce(p, None, to_version, counter) counter = counter + 1 elif bounce_type == "full": self.restart_all_nodes_with(to_version) else: raise Exception("Unrecognized bounce_type: " + str(bounce_type)) # shutdown self.driver.stop() # Ideally, we would actually verify the expected results. # See KAFKA-10202 random.shuffle(self.processors) for p in self.processors: node = p.node with node.account.monitor_log(p.STDOUT_FILE) as monitor: p.stop() monitor.wait_until( "SMOKE-TEST-CLIENT-CLOSED", timeout_sec=60, err_msg="Never saw output 'SMOKE-TEST-CLIENT-CLOSED' on " + str(node.account)) def start_all_nodes_with(self, version): self.set_version(self.processor1, version) self.set_version(self.processor2, version) self.set_version(self.processor3, version) self.processor1.start() self.processor2.start() self.processor3.start() # double-check the version kafka_version_str = self.get_version_string(version) self.wait_for_verification(self.processor1, kafka_version_str, self.processor1.LOG_FILE) self.wait_for_verification(self.processor2, kafka_version_str, self.processor2.LOG_FILE) self.wait_for_verification(self.processor3, kafka_version_str, self.processor3.LOG_FILE) # wait for the members to join self.wait_for_verification(self.processor1, "SMOKE-TEST-CLIENT-STARTED", self.processor1.STDOUT_FILE) self.wait_for_verification(self.processor2, "SMOKE-TEST-CLIENT-STARTED", self.processor2.STDOUT_FILE) self.wait_for_verification(self.processor3, "SMOKE-TEST-CLIENT-STARTED", self.processor3.STDOUT_FILE) # make sure they've processed something self.wait_for_verification(self.processor1, self.processed_msg, self.processor1.STDOUT_FILE) self.wait_for_verification(self.processor2, self.processed_msg, self.processor2.STDOUT_FILE) self.wait_for_verification(self.processor3, self.processed_msg, self.processor3.STDOUT_FILE) def restart_all_nodes_with(self, version): self.processor1.stop_node(self.processor1.node) self.processor2.stop_node(self.processor2.node) self.processor3.stop_node(self.processor3.node) # make sure the members have stopped self.wait_for_verification(self.processor1, "SMOKE-TEST-CLIENT-CLOSED", self.processor1.STDOUT_FILE) self.wait_for_verification(self.processor2, "SMOKE-TEST-CLIENT-CLOSED", self.processor2.STDOUT_FILE) self.wait_for_verification(self.processor3, "SMOKE-TEST-CLIENT-CLOSED", self.processor3.STDOUT_FILE) self.roll_logs(self.processor1, ".1-1") self.roll_logs(self.processor2, ".1-1") self.roll_logs(self.processor3, ".1-1") self.set_version(self.processor1, version) self.set_version(self.processor2, version) self.set_version(self.processor3, version) self.processor1.start_node(self.processor1.node) self.processor2.start_node(self.processor2.node) self.processor3.start_node(self.processor3.node) # double-check the version kafka_version_str = self.get_version_string(version) self.wait_for_verification(self.processor1, kafka_version_str, self.processor1.LOG_FILE) self.wait_for_verification(self.processor2, kafka_version_str, self.processor2.LOG_FILE) self.wait_for_verification(self.processor3, kafka_version_str, self.processor3.LOG_FILE) # wait for the members to join self.wait_for_verification(self.processor1, "SMOKE-TEST-CLIENT-STARTED", self.processor1.STDOUT_FILE) self.wait_for_verification(self.processor2, "SMOKE-TEST-CLIENT-STARTED", self.processor2.STDOUT_FILE) self.wait_for_verification(self.processor3, "SMOKE-TEST-CLIENT-STARTED", self.processor3.STDOUT_FILE) # make sure they've processed something self.wait_for_verification(self.processor1, self.processed_msg, self.processor1.STDOUT_FILE) self.wait_for_verification(self.processor2, self.processed_msg, self.processor2.STDOUT_FILE) self.wait_for_verification(self.processor3, self.processed_msg, self.processor3.STDOUT_FILE) def get_version_string(self, version): if version.startswith("0") or version.startswith("1") \ or version.startswith("2.0") or version.startswith("2.1"): return "Kafka version : " + version elif "SNAPSHOT" in version: return "Kafka version.*" + self.base_version_number + ".*SNAPSHOT" else: return "Kafka version: " + version def wait_for_verification(self, processor, message, file, num_lines=1): wait_until(lambda: self.verify_from_file(processor, message, file ) >= num_lines, timeout_sec=60, err_msg="Did expect to read '%s' from %s" % (message, processor.node.account)) def verify_from_file(self, processor, message, file): result = processor.node.account.ssh_output("grep -E '%s' %s | wc -l" % (message, file), allow_fail=False) try: return int(result) except ValueError: self.logger.warn("Command failed with ValueError: " + result) return 0 def set_version(self, processor, version): if version == str(DEV_VERSION): processor.set_version("") # set to TRUNK else: processor.set_version(version) def purge_state_dir(self, processor): processor.node.account.ssh("rm -rf " + processor.PERSISTENT_ROOT, allow_fail=False) def do_stop_start_bounce(self, processor, upgrade_from, new_version, counter): kafka_version_str = self.get_version_string(new_version) first_other_processor = None second_other_processor = None for p in self.processors: if p != processor: if first_other_processor is None: first_other_processor = p else: second_other_processor = p node = processor.node first_other_node = first_other_processor.node second_other_node = second_other_processor.node # stop processor and wait for rebalance of others with first_other_node.account.monitor_log( first_other_processor.STDOUT_FILE) as first_other_monitor: with second_other_node.account.monitor_log( second_other_processor.STDOUT_FILE ) as second_other_monitor: processor.stop_node(processor.node) first_other_monitor.wait_until( self.processed_msg, timeout_sec=60, err_msg="Never saw output '%s' on " % self.processed_msg + str(first_other_node.account)) second_other_monitor.wait_until( self.processed_msg, timeout_sec=60, err_msg="Never saw output '%s' on " % self.processed_msg + str(second_other_node.account)) node.account.ssh_capture("grep SMOKE-TEST-CLIENT-CLOSED %s" % processor.STDOUT_FILE, allow_fail=False) if upgrade_from is None: # upgrade disabled -- second round of rolling bounces roll_counter = ".1-" # second round of rolling bounces else: roll_counter = ".0-" # first round of rolling bounces self.roll_logs(processor, roll_counter + str(counter)) self.set_version(processor, new_version) processor.set_upgrade_from(upgrade_from) grep_metadata_error = "grep \"org.apache.kafka.streams.errors.TaskAssignmentException: unable to decode subscription data: version=2\" " with node.account.monitor_log(processor.STDOUT_FILE) as monitor: with node.account.monitor_log(processor.LOG_FILE) as log_monitor: with first_other_node.account.monitor_log( first_other_processor.STDOUT_FILE ) as first_other_monitor: with second_other_node.account.monitor_log( second_other_processor.STDOUT_FILE ) as second_other_monitor: processor.start_node(processor.node) log_monitor.wait_until( kafka_version_str, timeout_sec=60, err_msg="Could not detect Kafka Streams version " + new_version + " on " + str(node.account)) first_other_monitor.wait_until( self.processed_msg, timeout_sec=60, err_msg="Never saw output '%s' on " % self.processed_msg + str(first_other_node.account)) found = list( first_other_node.account.ssh_capture( grep_metadata_error + first_other_processor.STDERR_FILE, allow_fail=True)) if len(found) > 0: raise Exception( "Kafka Streams failed with 'unable to decode subscription data: version=2'" ) second_other_monitor.wait_until( self.processed_msg, timeout_sec=60, err_msg="Never saw output '%s' on " % self.processed_msg + str(second_other_node.account)) found = list( second_other_node.account.ssh_capture( grep_metadata_error + second_other_processor.STDERR_FILE, allow_fail=True)) if len(found) > 0: raise Exception( "Kafka Streams failed with 'unable to decode subscription data: version=2'" ) monitor.wait_until( self.processed_msg, timeout_sec=60, err_msg="Never saw output '%s' on " % self.processed_msg + str(node.account)) def roll_logs(self, processor, roll_suffix): processor.node.account.ssh("mv " + processor.STDOUT_FILE + " " + processor.STDOUT_FILE + roll_suffix, allow_fail=False) processor.node.account.ssh("mv " + processor.STDERR_FILE + " " + processor.STDERR_FILE + roll_suffix, allow_fail=False) processor.node.account.ssh("mv " + processor.LOG_FILE + " " + processor.LOG_FILE + roll_suffix, allow_fail=False) processor.node.account.ssh("mv " + processor.CONFIG_FILE + " " + processor.CONFIG_FILE + roll_suffix, allow_fail=False)
def test_streams(self, processing_guarantee, crash, metadata_quorum=quorum.zk): processor1 = StreamsSmokeTestJobRunnerService(self.test_context, self.kafka, processing_guarantee) processor2 = StreamsSmokeTestJobRunnerService(self.test_context, self.kafka, processing_guarantee) processor3 = StreamsSmokeTestJobRunnerService(self.test_context, self.kafka, processing_guarantee) with processor1.node.account.monitor_log(processor1.STDOUT_FILE) as monitor1: processor1.start() monitor1.wait_until('REBALANCING -> RUNNING', timeout_sec=60, err_msg="Never saw 'REBALANCING -> RUNNING' message " + str(processor1.node.account) ) self.driver.start() monitor1.wait_until('processed', timeout_sec=30, err_msg="Didn't see any processing messages " + str(processor1.node.account) ) # make sure we're not already done processing (which would invalidate the test) self.driver.node.account.ssh("! grep 'Result Verification' %s" % self.driver.STDOUT_FILE, allow_fail=False) processor1.stop_nodes(not crash) with processor2.node.account.monitor_log(processor2.STDOUT_FILE) as monitor2: processor2.start() monitor2.wait_until('REBALANCING -> RUNNING', timeout_sec=120, err_msg="Never saw 'REBALANCING -> RUNNING' message " + str(processor2.node.account) ) monitor2.wait_until('processed', timeout_sec=30, err_msg="Didn't see any processing messages " + str(processor2.node.account) ) # make sure we're not already done processing (which would invalidate the test) self.driver.node.account.ssh("! grep 'Result Verification' %s" % self.driver.STDOUT_FILE, allow_fail=False) processor2.stop_nodes(not crash) with processor3.node.account.monitor_log(processor3.STDOUT_FILE) as monitor3: processor3.start() monitor3.wait_until('REBALANCING -> RUNNING', timeout_sec=120, err_msg="Never saw 'REBALANCING -> RUNNING' message " + str(processor3.node.account) ) # there should still be some data left for this processor to work on. monitor3.wait_until('processed', timeout_sec=30, err_msg="Didn't see any processing messages " + str(processor3.node.account) ) self.driver.wait() self.driver.stop() processor3.stop() if crash and processing_guarantee == 'at_least_once': self.driver.node.account.ssh("grep -E 'SUCCESS|PROCESSED-MORE-THAN-GENERATED' %s" % self.driver.STDOUT_FILE, allow_fail=False) else: self.driver.node.account.ssh("grep SUCCESS %s" % self.driver.STDOUT_FILE, allow_fail=False)
class StreamsBrokerBounceTest(Test): """ Simple test of Kafka Streams with brokers failing """ def __init__(self, test_context): super(StreamsBrokerBounceTest, self).__init__(test_context) self.replication = 3 self.partitions = 3 self.topics = { 'echo' : { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': {"min.insync.replicas": 2}}, 'data' : { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': {"min.insync.replicas": 2} }, 'min' : { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': {"min.insync.replicas": 2} }, 'max' : { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': {"min.insync.replicas": 2} }, 'sum' : { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': {"min.insync.replicas": 2} }, 'dif' : { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': {"min.insync.replicas": 2} }, 'cnt' : { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': {"min.insync.replicas": 2} }, 'avg' : { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': {"min.insync.replicas": 2} }, 'wcnt' : { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': {"min.insync.replicas": 2} }, 'tagg' : { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': {"min.insync.replicas": 2} }, '__consumer_offsets' : { 'partitions': 50, 'replication-factor': self.replication, 'configs': {"min.insync.replicas": 2} } } def fail_broker_type(self, failure_mode, broker_type): # Pick a random topic and bounce it's leader topic_index = randint(0, len(self.topics.keys()) - 1) topic = self.topics.keys()[topic_index] failures[failure_mode](self, topic, broker_type) def fail_many_brokers(self, failure_mode, num_failures): sig = signal.SIGTERM if (failure_mode == "clean_shutdown"): sig = signal.SIGTERM else: sig = signal.SIGKILL for num in range(0, num_failures - 1): signal_node(self, self.kafka.nodes[num], sig) def confirm_topics_on_all_brokers(self, expected_topic_set): for node in self.kafka.nodes: match_count = 0 # need to iterate over topic_list_generator as kafka.list_topics() # returns a python generator so values are fetched lazily # so we can't just compare directly we must iterate over what's returned topic_list_generator = self.kafka.list_topics(node=node) for topic in topic_list_generator: if topic in expected_topic_set: match_count += 1 if len(expected_topic_set) != match_count: return False return True def setup_system(self, start_processor=True): # Setup phase self.zk = ZookeeperService(self.test_context, num_nodes=1) self.zk.start() self.kafka = KafkaService(self.test_context, num_nodes=self.replication, zk=self.zk, topics=self.topics) self.kafka.start() # allow some time for topics to be created wait_until(lambda: self.confirm_topics_on_all_brokers(set(self.topics.keys())), timeout_sec=60, err_msg="Broker did not create all topics in 60 seconds ") # Start test harness self.driver = StreamsSmokeTestDriverService(self.test_context, self.kafka) self.processor1 = StreamsSmokeTestJobRunnerService(self.test_context, self.kafka) self.driver.start() if (start_processor): self.processor1.start() def collect_results(self, sleep_time_secs): data = {} # End test self.driver.wait() self.driver.stop() self.processor1.stop() node = self.driver.node # Success is declared if streams does not crash when sleep time > 0 # It should give an exception when sleep time is 0 since we kill the brokers immediately # and the topic manager cannot create internal topics with the desired replication factor if (sleep_time_secs == 0): output_streams = self.processor1.node.account.ssh_capture("grep SMOKE-TEST-CLIENT-EXCEPTION %s" % self.processor1.STDOUT_FILE, allow_fail=False) else: output_streams = self.processor1.node.account.ssh_capture("grep SMOKE-TEST-CLIENT-CLOSED %s" % self.processor1.STDOUT_FILE, allow_fail=False) for line in output_streams: data["Client closed"] = line # Currently it is hard to guarantee anything about Kafka since we don't have exactly once. # With exactly once in place, success will be defined as ALL-RECORDS-DELIEVERD and SUCCESS output = node.account.ssh_capture("grep -E 'ALL-RECORDS-DELIVERED|PROCESSED-MORE-THAN-GENERATED|PROCESSED-LESS-THAN-GENERATED' %s" % self.driver.STDOUT_FILE, allow_fail=False) for line in output: data["Records Delivered"] = line output = node.account.ssh_capture("grep -E 'SUCCESS|FAILURE' %s" % self.driver.STDOUT_FILE, allow_fail=False) for line in output: data["Logic Success/Failure"] = line return data @cluster(num_nodes=7) @matrix(failure_mode=["clean_shutdown", "hard_shutdown", "clean_bounce", "hard_bounce"], broker_type=["leader", "controller"], sleep_time_secs=[120]) def test_broker_type_bounce(self, failure_mode, broker_type, sleep_time_secs): """ Start a smoke test client, then kill one particular broker and ensure data is still received Record if records are delivered. """ self.setup_system() # Sleep to allow test to run for a bit time.sleep(sleep_time_secs) # Fail brokers self.fail_broker_type(failure_mode, broker_type) return self.collect_results(sleep_time_secs) @ignore @cluster(num_nodes=7) @matrix(failure_mode=["clean_shutdown"], broker_type=["controller"], sleep_time_secs=[0]) def test_broker_type_bounce_at_start(self, failure_mode, broker_type, sleep_time_secs): """ Start a smoke test client, then kill one particular broker immediately before streams stats Streams should throw an exception since it cannot create topics with the desired replication factor of 3 """ self.setup_system(start_processor=False) # Sleep to allow test to run for a bit time.sleep(sleep_time_secs) # Fail brokers self.fail_broker_type(failure_mode, broker_type) self.processor1.start() return self.collect_results(sleep_time_secs) @cluster(num_nodes=7) @matrix(failure_mode=["clean_shutdown", "hard_shutdown", "clean_bounce", "hard_bounce"], num_failures=[2]) def test_many_brokers_bounce(self, failure_mode, num_failures): """ Start a smoke test client, then kill a few brokers and ensure data is still received Record if records are delivered """ self.setup_system() # Sleep to allow test to run for a bit time.sleep(120) # Fail brokers self.fail_many_brokers(failure_mode, num_failures) return self.collect_results(120) @cluster(num_nodes=7) @matrix(failure_mode=["clean_bounce", "hard_bounce"], num_failures=[3]) def test_all_brokers_bounce(self, failure_mode, num_failures): """ Start a smoke test client, then kill a few brokers and ensure data is still received Record if records are delivered """ self.setup_system() # Sleep to allow test to run for a bit time.sleep(120) # Fail brokers self.fail_many_brokers(failure_mode, num_failures) return self.collect_results(120)
class StreamsBrokerBounceTest(Test): """ Simple test of Kafka Streams with brokers failing """ def __init__(self, test_context): super(StreamsBrokerBounceTest, self).__init__(test_context) self.replication = 3 self.partitions = 3 self.topics = { 'echo': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'data': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'min': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'max': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'sum': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'dif': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'cnt': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'avg': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'wcnt': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, 'tagg': { 'partitions': self.partitions, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } }, '__consumer_offsets': { 'partitions': 50, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 2 } } } def fail_broker_type(self, failure_mode, broker_type): # Pick a random topic and bounce it's leader topic_index = randint(0, len(self.topics.keys()) - 1) topic = self.topics.keys()[topic_index] failures[failure_mode](self, topic, broker_type) def fail_many_brokers(self, failure_mode, num_failures): sig = signal.SIGTERM if (failure_mode == "clean_shutdown"): sig = signal.SIGTERM else: sig = signal.SIGKILL for num in range(0, num_failures - 1): signal_node(self, self.kafka.nodes[num], sig) def confirm_topics_on_all_brokers(self, expected_topic_set): for node in self.kafka.nodes: match_count = 0 # need to iterate over topic_list_generator as kafka.list_topics() # returns a python generator so values are fetched lazily # so we can't just compare directly we must iterate over what's returned topic_list_generator = self.kafka.list_topics(node=node) for topic in topic_list_generator: if topic in expected_topic_set: match_count += 1 if len(expected_topic_set) != match_count: return False return True def setup_system(self, start_processor=True, num_threads=3): # Setup phase self.zk = ZookeeperService(self.test_context, num_nodes=1) self.zk.start() self.kafka = KafkaService(self.test_context, num_nodes=self.replication, zk=self.zk, topics=self.topics) self.kafka.start() # allow some time for topics to be created wait_until(lambda: self.confirm_topics_on_all_brokers( set(self.topics.keys())), timeout_sec=60, err_msg="Broker did not create all topics in 60 seconds ") # Start test harness self.driver = StreamsSmokeTestDriverService(self.test_context, self.kafka) self.processor1 = StreamsSmokeTestJobRunnerService( self.test_context, self.kafka, "at_least_once", num_threads) self.driver.start() if (start_processor): self.processor1.start() def collect_results(self, sleep_time_secs): data = {} # End test self.driver.wait() self.driver.stop() self.processor1.stop() node = self.driver.node # Success is declared if streams does not crash when sleep time > 0 # It should give an exception when sleep time is 0 since we kill the brokers immediately # and the topic manager cannot create internal topics with the desired replication factor if (sleep_time_secs == 0): output_streams = self.processor1.node.account.ssh_capture( "grep SMOKE-TEST-CLIENT-EXCEPTION %s" % self.processor1.STDOUT_FILE, allow_fail=False) else: output_streams = self.processor1.node.account.ssh_capture( "grep SMOKE-TEST-CLIENT-CLOSED %s" % self.processor1.STDOUT_FILE, allow_fail=False) for line in output_streams: data["Client closed"] = line # Currently it is hard to guarantee anything about Kafka since we don't have exactly once. # With exactly once in place, success will be defined as ALL-RECORDS-DELIEVERD and SUCCESS output = node.account.ssh_capture( "grep -E 'ALL-RECORDS-DELIVERED|PROCESSED-MORE-THAN-GENERATED|PROCESSED-LESS-THAN-GENERATED' %s" % self.driver.STDOUT_FILE, allow_fail=False) for line in output: data["Records Delivered"] = line output = node.account.ssh_capture("grep -E 'SUCCESS|FAILURE' %s" % self.driver.STDOUT_FILE, allow_fail=False) for line in output: data["Logic Success/Failure"] = line return data @cluster(num_nodes=7) @matrix(failure_mode=[ "clean_shutdown", "hard_shutdown", "clean_bounce", "hard_bounce" ], broker_type=["leader", "controller"], num_threads=[1, 3], sleep_time_secs=[120]) def test_broker_type_bounce(self, failure_mode, broker_type, sleep_time_secs, num_threads): """ Start a smoke test client, then kill one particular broker and ensure data is still received Record if records are delivered. We also add a single thread stream client to make sure we could get all partitions reassigned in next generation so to verify the partition lost is correctly triggered. """ self.setup_system(num_threads=num_threads) # Sleep to allow test to run for a bit time.sleep(sleep_time_secs) # Fail brokers self.fail_broker_type(failure_mode, broker_type) return self.collect_results(sleep_time_secs) @ignore @cluster(num_nodes=7) @matrix(failure_mode=["clean_shutdown"], broker_type=["controller"], sleep_time_secs=[0]) def test_broker_type_bounce_at_start(self, failure_mode, broker_type, sleep_time_secs): """ Start a smoke test client, then kill one particular broker immediately before streams stats Streams should throw an exception since it cannot create topics with the desired replication factor of 3 """ self.setup_system(start_processor=False) # Sleep to allow test to run for a bit time.sleep(sleep_time_secs) # Fail brokers self.fail_broker_type(failure_mode, broker_type) self.processor1.start() return self.collect_results(sleep_time_secs) @cluster(num_nodes=7) @matrix(failure_mode=[ "clean_shutdown", "hard_shutdown", "clean_bounce", "hard_bounce" ], num_failures=[2]) def test_many_brokers_bounce(self, failure_mode, num_failures): """ Start a smoke test client, then kill a few brokers and ensure data is still received Record if records are delivered """ self.setup_system() # Sleep to allow test to run for a bit time.sleep(120) # Fail brokers self.fail_many_brokers(failure_mode, num_failures) return self.collect_results(120) @cluster(num_nodes=7) @matrix(failure_mode=["clean_bounce", "hard_bounce"], num_failures=[3]) def test_all_brokers_bounce(self, failure_mode, num_failures): """ Start a smoke test client, then kill a few brokers and ensure data is still received Record if records are delivered """ # Set min.insync.replicas to 1 because in the last stage of the test there is only one broker left. # Otherwise the last offset commit will never succeed and time out and potentially take longer as # duration passed to the close method of the Kafka Streams client. self.topics['__consumer_offsets'] = { 'partitions': 50, 'replication-factor': self.replication, 'configs': { "min.insync.replicas": 1 } } self.setup_system() # Sleep to allow test to run for a bit time.sleep(120) # Fail brokers self.fail_many_brokers(failure_mode, num_failures) return self.collect_results(120)