예제 #1
0
 def node_termination_scenario(self, instance_kill_count, node, timeout):
     for _ in range(instance_kill_count):
         try:
             logging.info("Starting node_termination_scenario injection")
             instance_id = self.aws.get_instance_id(node)
             logging.info("Terminating the node %s with instance ID: %s " %
                          (node, instance_id))
             self.aws.terminate_instances(instance_id)
             self.aws.wait_until_terminated(instance_id)
             for _ in range(timeout):
                 if node not in kubecli.list_nodes():
                     break
                 time.sleep(1)
             if node in kubecli.list_nodes():
                 raise Exception("Node could not be terminated")
             logging.info("Node with instance ID: %s has been terminated" %
                          (instance_id))
             logging.info(
                 "node_termination_scenario has been successfuly injected!")
         except Exception as e:
             logging.error(
                 "Failed to terminate node instance. Encountered following exception:"
                 " %s. Test Failed" % (e))
             logging.error("node_termination_scenario injection failed!")
             sys.exit(1)
예제 #2
0
def skew_time(scenario):
    skew_command = "date --set "
    if scenario['action'] == "skew_date":
        skewed_date = "00-01-01"
        skew_command += skewed_date
    elif scenario['action'] == "skew_time":
        skewed_time = "01:01:01"
        skew_command += skewed_time
    if "node" in scenario["object_type"]:
        node_names = []
        if "object_name" in scenario.keys() and scenario['object_name']:
            node_names = scenario['object_name']
        elif "label_selector" in scenario.keys(
        ) and scenario['label_selector']:
            node_names = kubecli.list_nodes(scenario['label_selector'])

        for node in node_names:
            node_debug(node, skew_command)
            logging.info("Reset date/time on node " + str(node))
        return "node", node_names

    elif "pod" in scenario['object_type']:
        pod_names = []
        if "object_name" in scenario.keys() and scenario['object_name']:
            for name in scenario['object_name']:
                if "namespace" not in scenario.keys():
                    logging.error("Need to set namespace when using pod name")
                    sys.exit(1)
                pod_names.append([name, scenario['namespace']])
        elif "label_selector" in scenario.keys(
        ) and scenario['label_selector']:
            pod_names = kubecli.get_all_pods(scenario['label_selector'])
        elif "namespace" in scenario.keys() and scenario['namespace']:
            pod_names = kubecli.list_pods(scenario['namespace'])
            counter = 0
            for pod_name in pod_names:
                pod_names[counter] = [pod_name, scenario['namespace']]
                counter += 1

        for pod in pod_names:
            if len(pod) > 1:
                pod_exec(pod[0], skew_command, pod[1])
            else:
                pod_exec(pod, skew_command, scenario['namespace'])
            logging.info("Reset date/time on pod " + str(pod[0]))
        return "pod", pod_names
예제 #3
0
def skew_time(scenario):
    skew_command = "date --set "
    if scenario["action"] == "skew_date":
        skewed_date = "00-01-01"
        skew_command += skewed_date
    elif scenario["action"] == "skew_time":
        skewed_time = "01:01:01"
        skew_command += skewed_time
    if "node" in scenario["object_type"]:
        node_names = []
        if "object_name" in scenario.keys() and scenario["object_name"]:
            node_names = scenario["object_name"]
        elif "label_selector" in scenario.keys() and scenario["label_selector"]:
            node_names = kubecli.list_nodes(scenario["label_selector"])

        for node in node_names:
            node_debug(node, skew_command)
            logging.info("Reset date/time on node " + str(node))
        return "node", node_names

    elif "pod" in scenario["object_type"]:
        container_name = scenario.get("container_name", "")
        pod_names = []
        if "object_name" in scenario.keys() and scenario["object_name"]:
            for name in scenario["object_name"]:
                if "namespace" not in scenario.keys():
                    logging.error("Need to set namespace when using pod name")
                    sys.exit(1)
                pod_names.append([name, scenario["namespace"]])
        elif "namespace" in scenario.keys() and scenario["namespace"]:
            if "label_selector" not in scenario.keys():
                logging.info(
                    "label_selector key not found, querying for all the pods in namespace: %s" % (scenario["namespace"])
                )
                pod_names = kubecli.list_pods(scenario["namespace"])
            else:
                logging.info(
                    "Querying for the pods matching the %s label_selector in namespace %s"
                    % (scenario["label_selector"], scenario["namespace"])
                )
                pod_names = kubecli.list_pods(scenario["namespace"], scenario["label_selector"])
            counter = 0
            for pod_name in pod_names:
                pod_names[counter] = [pod_name, scenario["namespace"]]
                counter += 1
        elif "label_selector" in scenario.keys() and scenario["label_selector"]:
            pod_names = kubecli.get_all_pods(scenario["label_selector"])

        if len(pod_names) == 0:
            logging.info("Cannot find pods matching the namespace/label_selector, please check")
            sys.exit(1)
        pod_counter = 0
        for pod in pod_names:
            if len(pod) > 1:
                selected_container_name = get_container_name(pod[0], pod[1], container_name)
                pod_exec_response = pod_exec(pod[0], skew_command, pod[1], selected_container_name)
                if pod_exec_response is False:
                    logging.error(
                        "Couldn't reset time on container %s in pod %s in namespace %s"
                        % (selected_container_name, pod[0], pod[1])
                    )
                    sys.exit(1)
                pod_names[pod_counter].append(selected_container_name)
            else:
                selected_container_name = get_container_name(pod, scenario["namespace"], container_name)
                pod_exec_response = pod_exec(pod, skew_command, scenario["namespace"], selected_container_name)
                if pod_exec_response is False:
                    logging.error(
                        "Couldn't reset time on container %s in pod %s in namespace %s"
                        % (selected_container_name, pod, scenario["namespace"])
                    )
                    sys.exit(1)
                pod_names[pod_counter].append(selected_container_name)
            logging.info("Reset date/time on pod " + str(pod[0]))
            pod_counter += 1
        return "pod", pod_names
예제 #4
0
def cluster_shut_down(shut_down_config):
    runs = shut_down_config["runs"]
    shut_down_duration = shut_down_config["shut_down_duration"]
    cloud_type = shut_down_config["cloud_type"]
    timeout = shut_down_config["timeout"]
    if cloud_type.lower() == "aws":
        cloud_object = AWS()
    elif cloud_type.lower() == "gcp":
        cloud_object = GCP()
    elif cloud_type.lower() == "openstack":
        cloud_object = OPENSTACKCLOUD()
    elif cloud_type.lower() in ["azure", "az"]:
        cloud_object = Azure()
    else:
        logging.error("Cloud type " + cloud_type +
                      " is not currently supported for cluster shut down")
        sys.exit(1)

    nodes = kubecli.list_nodes()
    node_id = []
    for node in nodes:
        instance_id = cloud_object.get_instance_id(node)
        node_id.append(instance_id)
    logging.info("node id list " + str(node_id))
    for _ in range(runs):
        logging.info("Starting cluster_shut_down scenario injection")
        stopping_nodes = set(node_id)
        multiprocess_nodes(cloud_object.stop_instances, node_id)
        stopped_nodes = stopping_nodes.copy()
        while len(stopping_nodes) > 0:
            for node in stopping_nodes:
                if type(node) is tuple:
                    node_status = cloud_object.wait_until_stopped(
                        node[1], node[0], timeout)
                else:
                    node_status = cloud_object.wait_until_stopped(
                        node, timeout)

                # Only want to remove node from stopping list when fully stopped/no error
                if node_status:
                    stopped_nodes.remove(node)

            stopping_nodes = stopped_nodes.copy()

        logging.info(
            "Shutting down the cluster for the specified duration: %s" %
            (shut_down_duration))
        time.sleep(shut_down_duration)
        logging.info("Restarting the nodes")
        restarted_nodes = set(node_id)
        multiprocess_nodes(cloud_object.start_instances, node_id)
        logging.info("Wait for each node to be running again")
        not_running_nodes = restarted_nodes.copy()
        while len(not_running_nodes) > 0:
            for node in not_running_nodes:
                if type(node) is tuple:
                    node_status = cloud_object.wait_until_running(
                        node[1], node[0], timeout)
                else:
                    node_status = cloud_object.wait_until_running(
                        node, timeout)
                if node_status:
                    restarted_nodes.remove(node)
            not_running_nodes = restarted_nodes.copy()
        logging.info(
            "Waiting for 150s to allow cluster component initialization")
        time.sleep(150)

        logging.info("Successfully injected cluster_shut_down scenario!")