def run(kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_duration): try: # Loop to run the scenarios starts here for pod_scenario in scenarios_list: if len(pod_scenario) > 1: pre_action_output = post_actions.run(kubeconfig_path, pod_scenario[1]) else: pre_action_output = "" scenario_logs = runcommand.invoke( "powerfulseal autonomous --use-pod-delete-instead-" "of-ssh-kill --policy-file %s --kubeconfig %s " "--no-cloud --inventory-kubernetes --headless" % (pod_scenario[0], kubeconfig_path)) # Display pod scenario logs/actions print(scenario_logs) logging.info("Scenario: %s has been successfully injected!" % (pod_scenario[0])) logging.info("Waiting for the specified duration: %s" % (wait_duration)) time.sleep(wait_duration) failed_post_scenarios = post_actions.check_recovery( kubeconfig_path, pod_scenario, failed_post_scenarios, pre_action_output) cerberus.publish_kraken_status(config, failed_post_scenarios) except Exception as e: logging.error("Failed to run scenario: %s. Encountered the following " "exception: %s" % (pod_scenario[0], e)) return failed_post_scenarios
def container_run(kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_duration): for container_scenario_config in scenarios_list: if len(container_scenario_config) > 1: pre_action_output = post_actions.run(kubeconfig_path, container_scenario_config[1]) else: pre_action_output = "" with open(container_scenario_config[0], "r") as f: cont_scenario_config = yaml.full_load(f) for cont_scenario in cont_scenario_config["scenarios"]: # capture start time start_time = int(time.time()) killed_containers = container_killing_in_pod(cont_scenario) if len(container_scenario_config) > 1: try: failed_post_scenarios = post_actions.check_recovery( kubeconfig_path, container_scenario_config, failed_post_scenarios, pre_action_output ) except Exception as e: logging.error("Failed to run post action checks: %s" % e) sys.exit(1) else: failed_post_scenarios = check_failed_containers( killed_containers, cont_scenario.get("retry_wait", 120) ) logging.info("Waiting for the specified duration: %s" % (wait_duration)) time.sleep(wait_duration) # capture end time end_time = int(time.time()) # publish cerberus status cerberus.publish_kraken_status(config, failed_post_scenarios, start_time, end_time) logging.info("")
def run(kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_duration): # Loop to run the scenarios starts here for pod_scenario in scenarios_list: if len(pod_scenario) > 1: pre_action_output = post_actions.run(kubeconfig_path, pod_scenario[1]) else: pre_action_output = "" try: # capture start time start_time = int(time.time()) input = serialization.load_from_file(pod_scenario) s = pod_plugin.get_schema() input_data: pod_plugin.KillPodConfig = s.unserialize_input( "pod", input) if kubeconfig_path is not None: input_data.kubeconfig_path = kubeconfig_path output_id, output_data = s.call_step("pod", input_data) if output_id == "error": data: pod_plugin.PodErrorOutput = output_data logging.error("Failed to run pod scenario: {}".format( data.error)) else: data: pod_plugin.PodSuccessOutput = output_data for pod in data.pods: print("Deleted pod {} in namespace {}\n".format( pod.pod_name, pod.pod_namespace)) except Exception as e: logging.error( "Failed to run scenario: %s. Encountered the following " "exception: %s" % (pod_scenario[0], e)) sys.exit(1) logging.info("Scenario: %s has been successfully injected!" % (pod_scenario[0])) logging.info("Waiting for the specified duration: %s" % (wait_duration)) time.sleep(wait_duration) try: failed_post_scenarios = post_actions.check_recovery( kubeconfig_path, pod_scenario, failed_post_scenarios, pre_action_output) except Exception as e: logging.error("Failed to run post action checks: %s" % e) sys.exit(1) # capture end time end_time = int(time.time()) # publish cerberus status cerberus.publish_kraken_status(config, failed_post_scenarios, start_time, end_time) return failed_post_scenarios
def run(scenarios_list, config, wait_duration, failed_post_scenarios, kubeconfig_path): for scenario_config in scenarios_list: if len(scenario_config) > 1: pre_action_output = post_actions.run(kubeconfig_path, scenario_config[1]) else: pre_action_output = "" with open(scenario_config[0], "r") as f: scenario_config_yaml = yaml.full_load(f) for scenario in scenario_config_yaml["scenarios"]: scenario_namespace = scenario.get("namespace", "^.*$") scenario_label = scenario.get("label_selector", None) run_count = scenario.get("runs", 1) namespace_action = scenario.get("action", "delete") run_sleep = scenario.get("sleep", 10) wait_time = scenario.get("wait_time", 30) killed_namespaces = [] namespaces = kubecli.check_namespaces([scenario_namespace], scenario_label) start_time = int(time.time()) for i in range(run_count): if len(namespaces) == 0: logging.error( "Couldn't %s %s namespaces, not enough namespaces matching %s with label %s" % (namespace_action, str(run_count), scenario_namespace, str(scenario_label)) ) sys.exit(1) selected_namespace = namespaces[random.randint(0, len(namespaces) - 1)] killed_namespaces.append(selected_namespace) try: runcommand.invoke("oc %s project %s" % (namespace_action, selected_namespace)) logging.info(namespace_action + " on namespace " + str(selected_namespace) + " was successful") except Exception as e: logging.info( namespace_action + " on namespace " + str(selected_namespace) + " was unsuccessful" ) logging.info("Namespace action error: " + str(e)) sys.exit(1) namespaces.remove(selected_namespace) logging.info("Waiting %s seconds between namespace deletions" % str(run_sleep)) time.sleep(run_sleep) logging.info("Waiting for the specified duration: %s" % wait_duration) time.sleep(wait_duration) if len(scenario_config) > 1: try: failed_post_scenarios = post_actions.check_recovery( kubeconfig_path, scenario_config, failed_post_scenarios, pre_action_output ) except Exception as e: logging.error("Failed to run post action checks: %s" % e) sys.exit(1) else: failed_post_scenarios = check_active_namespace(killed_namespaces, wait_time) end_time = int(time.time()) cerberus.publish_kraken_status(config, failed_post_scenarios, start_time, end_time)
def run(scenarios_list, config, wait_duration): failed_post_scenarios = [] for shut_down_config in scenarios_list: if len(shut_down_config) > 1: pre_action_output = post_actions.run("", shut_down_config[1]) else: pre_action_output = "" with open(shut_down_config[0], "r") as f: shut_down_config_yaml = yaml.full_load(f) shut_down_config_scenario = shut_down_config_yaml[ "cluster_shut_down_scenario"] cluster_shut_down(shut_down_config_scenario) logging.info("Waiting for the specified duration: %s" % (wait_duration)) time.sleep(wait_duration) failed_post_scenarios = post_actions.check_recovery( "", shut_down_config, failed_post_scenarios, pre_action_output) cerberus.publish_kraken_status(config, failed_post_scenarios)
def run(scenarios_list, config, wait_duration, failed_post_scenarios, kubeconfig_path): for scenario_config in scenarios_list: if len(scenario_config) > 1: pre_action_output = post_actions.run(kubeconfig_path, scenario_config[1]) else: pre_action_output = "" with open(scenario_config[0], "r") as f: scenario_config_yaml = yaml.full_load(f) for scenario in scenario_config_yaml["scenarios"]: scenario_namespace = scenario.get("namespace", "") scenario_label = scenario.get("label_selector", "") if scenario_namespace is not None and scenario_namespace.strip( ) != "": if scenario_label is not None and scenario_label.strip( ) != "": logging.error( "You can only have namespace or label set in your namespace scenario" ) logging.error( "Current scenario config has namespace '%s' and label selector '%s'" % (scenario_namespace, scenario_label)) logging.error( "Please set either namespace to blank ('') or label_selector to blank ('') to continue" ) sys.exit(1) delete_count = scenario.get("delete_count", 1) run_count = scenario.get("runs", 1) run_sleep = scenario.get("sleep", 10) wait_time = scenario.get("wait_time", 30) killed_namespaces = [] start_time = int(time.time()) for i in range(run_count): namespaces = kubecli.check_namespaces([scenario_namespace], scenario_label) for j in range(delete_count): if len(namespaces) == 0: logging.error( "Couldn't delete %s namespaces, not enough namespaces matching %s with label %s" % (str(run_count), scenario_namespace, str(scenario_label))) sys.exit(1) selected_namespace = namespaces[random.randint( 0, len(namespaces) - 1)] killed_namespaces.append(selected_namespace) try: kubecli.delete_namespace(selected_namespace) logging.info( "Delete on namespace %s was successful" % str(selected_namespace)) except Exception as e: logging.info( "Delete on namespace %s was unsuccessful" % str(selected_namespace)) logging.info("Namespace action error: " + str(e)) sys.exit(1) namespaces.remove(selected_namespace) logging.info( "Waiting %s seconds between namespace deletions" % str(run_sleep)) time.sleep(run_sleep) logging.info("Waiting for the specified duration: %s" % wait_duration) time.sleep(wait_duration) if len(scenario_config) > 1: try: failed_post_scenarios = post_actions.check_recovery( kubeconfig_path, scenario_config, failed_post_scenarios, pre_action_output) except Exception as e: logging.error( "Failed to run post action checks: %s" % e) sys.exit(1) else: failed_post_scenarios = check_active_namespace( killed_namespaces, wait_time) end_time = int(time.time()) cerberus.publish_kraken_status(config, failed_post_scenarios, start_time, end_time)