def teardown(self): chaos_opt = ChaosOpt(self._chaos_config['kind']) meta_name = self._chaos_config.get('metadata', None).get('name', None) chaos_opt.delete_chaos_object(meta_name, raise_ex=False) for k, ch in self.health_checkers.items(): ch.terminate() log.debug(f"tear down: checker {k} terminated") sleep(2) for k, t in self.checker_threads.items(): log.debug(f"Thread {k} is_alive(): {t.is_alive()}")
def reboot_pod(chaos_yaml): # parse chaos object chaos_config = gen_experiment_config(chaos_yaml) log.debug(chaos_config) # inject chaos chaos_opt = ChaosOpt(chaos_config['kind']) chaos_opt.create_chaos_object(chaos_config) log.debug("chaos injected") sleep(1) # delete chaos meta_name = chaos_config.get('metadata', None).get('name', None) chaos_opt.delete_chaos_object(meta_name) log.debug("chaos deleted")
def test_chaos(self, chaos_yaml): # start the monitor threads to check the milvus ops log.debug("*********************Chaos Test Start**********************") log.debug(connections.get_connection_addr('default')) self.checker_threads = start_monitor_threads(self.health_checkers) # parse chaos object chaos_config = gen_experiment_config(chaos_yaml) self._chaos_config = chaos_config # cache the chaos config for tear down log.debug(chaos_config) # parse the test expectations in testcases.yaml if self.parser_testcase_config(chaos_yaml) is False: log.error("Fail to get the testcase info in testcases.yaml") assert False # wait 120s sleep(constants.WAIT_PER_OP*2) # assert statistic:all ops 100% succ log.debug("******1st assert before chaos: ") assert_statistic(self.health_checkers) # apply chaos object chaos_opt = ChaosOpt(chaos_config['kind']) chaos_opt.create_chaos_object(chaos_config) log.debug("chaos injected") sleep(constants.WAIT_PER_OP * 2.1) # reset counting reset_counting(self.health_checkers) # wait 120s sleep(constants.WAIT_PER_OP*4) for k, t in self.checker_threads.items(): log.debug(f"10s later: Thread {k} is_alive(): {t.is_alive()}") # assert statistic log.debug("******2nd assert after chaos injected: ") assert_statistic(self.health_checkers, expectations={Op.create: self.expect_create, Op.insert: self.expect_insert, Op.flush: self.expect_flush, Op.index: self.expect_index, Op.search: self.expect_search, Op.query: self.expect_query }) # delete chaos meta_name = chaos_config.get('metadata', None).get('name', None) chaos_opt.delete_chaos_object(meta_name) log.debug("chaos deleted") for k, t in self.checker_threads.items(): log.debug(f"Thread {k} is_alive(): {t.is_alive()}") sleep(2) # reconnect if needed sleep(constants.WAIT_PER_OP*2) reconnect(connections, self.host, self.port) # reset counting again reset_counting(self.health_checkers) # wait 300s (varies by feature) sleep(constants.WAIT_PER_OP*5) # assert statistic: all ops success again log.debug("******3rd assert after chaos deleted: ") assert_statistic(self.health_checkers) # assert all expectations assert_expectations() log.debug("*********************Chaos Test Completed**********************")