def ensure_all_critical_processes_running(duthost, containers_in_namespaces):
    """Checks whether each critical process is running and starts it if it is not running.

    Args:
        duthost: Hostname of DUT.
        containers_in_namespaces: A dictionary where keys are container names and
        values are lists which contains ids of namespaces this container should reside in.

    Returns:
        None.
    """
    for container_name in containers_in_namespaces.keys():
        critical_group_list, critical_process_list, succeeded = duthost.get_critical_group_and_process_lists(container_name)
        pytest_assert(succeeded, "Failed to get critical group and process lists of container '{}'".format(container_name))

        namespace_ids = containers_in_namespaces[container_name]
        for namespace_id in namespace_ids:
            container_name_in_namespace = container_name
            if namespace_id != DEFAULT_ASIC_ID:
                container_name_in_namespace += namespace_id

            for critical_process in critical_process_list:
                # Skip 'dsserve' process since it was not managed by supervisord
                # TODO: Should remove the following two lines once the issue was solved in the image.
                if container_name_in_namespace == "syncd" and critical_process == "dsserve":
                    continue

                ensure_process_is_running(duthost, container_name_in_namespace, critical_process)

            for critical_group in critical_group_list:
                group_program_info = get_group_program_info(duthost, container_name_in_namespace, critical_group)
                for program_name in group_program_info:
                    ensure_process_is_running(duthost, container_name_in_namespace, program_name)
Ejemplo n.º 2
0
def stop_critical_processes(duthost, containers_in_namespaces):
    """Gets critical processes of each running container and then stops them from running.

    Args:
        duthost: Hostname of DUT.
        containers_in_namespaces: A dictionary where keys are container names and
        values are lists which contains ids of namespaces this container should reside in.

    Returns:
        None.
    """
    for container_name in containers_in_namespaces.keys():
        namespace_ids = containers_in_namespaces[container_name]
        container_name_in_namespace = container_name
        # If a container is only running on host, then namespace_ids is [None]
        # If a container is running on multi-ASIC, then namespace_ids is [0, 1, ...]
        # If a container is running on host and multi-ASICs, then namespace_ids is [None, 0, 1, ...]
        if len(namespace_ids) >= 2:
            container_name_in_namespace += namespace_ids[1]

        critical_group_list, critical_process_list, succeeded = duthost.get_critical_group_and_process_lists(
            container_name_in_namespace)
        pytest_assert(
            succeeded,
            "Failed to get critical group and process lists of container '{}'".
            format(container_name_in_namespace))

        for namespace_id in namespace_ids:
            container_name_in_namespace = container_name
            if namespace_id != DEFAULT_ASIC_ID:
                container_name_in_namespace += namespace_id

            for critical_process in critical_process_list:
                # Skip 'dsserve' process since it was not managed by supervisord
                # TODO: Should remove the following two lines once the issue was solved in the image.
                if "syncd" in container_name_in_namespace and critical_process == "dsserve":
                    continue

                program_status, program_pid = get_program_info(
                    duthost, container_name_in_namespace, critical_process)
                check_and_kill_process(duthost, container_name_in_namespace,
                                       critical_process, program_status,
                                       program_pid)

            for critical_group in critical_group_list:
                group_program_info = get_group_program_info(
                    duthost, container_name_in_namespace, critical_group)
                for program_name in group_program_info:
                    check_and_kill_process(duthost,
                                           container_name_in_namespace,
                                           critical_group + ":" + program_name,
                                           group_program_info[program_name][0],
                                           group_program_info[program_name][1])
def get_expected_alerting_messages(duthost, containers_in_namespaces):
    """Generates the regex of expected alerting messages for the critical processes in each namespace.

    Args:
        duthost: Hostname of DUT.
        containers_in_namespaces: A dictionary where keys are container names and
        values are lists which contains ids of namespaces this container should reside in.

    Returns:
        None.
    """
    expected_alerting_messages = []

    for container_name in containers_in_namespaces.keys():
        logger.info("Generating the expected alerting messages for container '{}'...".format(container_name))
        critical_group_list, critical_process_list, succeeded = duthost.get_critical_group_and_process_lists(container_name)
        pytest_assert(succeeded, "Failed to get critical group and process lists of container '{}'".format(container_name))

        namespace_ids = containers_in_namespaces[container_name]
        for namespace_id in namespace_ids:
            namespace_name = "host"
            if namespace_id != DEFAULT_ASIC_ID:
                namespace_name = NAMESPACE_PREFIX + namespace_id

            for critical_process in critical_process_list:
                # Skip 'dsserve' process since it was not managed by supervisord
                # TODO: Should remove the following two lines once the issue was solved in the image.
                if container_name == "syncd" and critical_process == "dsserve":
                    continue
                logger.info("Generating the expected alerting message for process '{}'".format(critical_process))
                expected_alerting_messages.append(".*Process '{}' is not running in namespace '{}'.*".format(critical_process, namespace_name))

            for critical_group in critical_group_list:
                group_program_info = get_group_program_info(duthost, container_name, critical_group)
                for program_name in group_program_info:
                    logger.info("Generating the expected alerting message for process '{}'".format(program_name))
                    expected_alerting_messages.append(".*Process '{}' is not running in namespace '{}'.*".format(program_name, namespace_name))

        logger.info("Generating the expected alerting messages for container '{}' was done!".format(container_name))

    return expected_alerting_messages
Ejemplo n.º 4
0
def get_expected_alerting_messages_supervisor(duthost,
                                              containers_in_namespaces):
    """Generates the regex of expected alerting messages for the critical processes in each container.
    These alerting messages will be matched against those in syslog generated by Supervisord.

    Args:
        duthost: Hostname of DUT.
        containers_in_namespaces: A dictionary where keys are container names and
        values are lists which contains ids of namespaces this container should reside in.

    Returns:
        A list contains the regex of alerting messages.
    """
    expected_alerting_messages = []

    logger.info("Generating the regex of expected alerting messages ...")
    for container_name in containers_in_namespaces.keys():
        namespace_ids = containers_in_namespaces[container_name]
        container_name_in_namespace = container_name
        # If a container is only running on host, then namespace_ids is [None]
        # If a container is running on multi-ASIC, then namespace_ids is [0, 1, ...]
        # If a container is running on host and multi-ASICs, then namespace_ids is [None, 0, 1, ...]
        if len(namespace_ids) > 2:
            container_name_in_namespace += namespace_ids[1]

        critical_group_list, critical_process_list, succeeded = duthost.get_critical_group_and_process_lists(
            container_name_in_namespace)
        pytest_assert(
            succeeded,
            "Failed to get critical group and process lists of container '{}'".
            format(container_name_in_namespace))

        for namespace_id in namespace_ids:
            namespace_name = "host"
            container_name_in_namespace = container_name
            if namespace_id != DEFAULT_ASIC_ID:
                namespace_name = NAMESPACE_PREFIX + namespace_id
                container_name_in_namespace += namespace_id

            logger.info(
                "Generating the regex of expected alerting messages for container '{}'..."
                .format(container_name_in_namespace))
            for critical_process in critical_process_list:
                # Skip 'dsserve' process since it was not managed by supervisord
                # TODO: Should remove the following two lines once the issue was solved in the image.
                if "syncd" in container_name_in_namespace and critical_process == "dsserve":
                    continue
                logger.info(
                    "Generating the regex of expected alerting message for process '{}' in container '{}'"
                    .format(critical_process, container_name_in_namespace))
                expected_alerting_messages.append(
                    ".*Process '{}' is not running in namespace '{}'.*".format(
                        critical_process, namespace_name))

            for critical_group in critical_group_list:
                group_program_info = get_group_program_info(
                    duthost, container_name_in_namespace, critical_group)
                for program_name in group_program_info:
                    logger.info(
                        "Generating the regex of expected alerting message for process '{}' in container '{}'"
                        .format(program_name, container_name_in_namespace))
                    expected_alerting_messages.append(
                        ".*Process '{}' is not running in namespace '{}'.*".
                        format(program_name, namespace_name))

            logger.info(
                "Generating the regex of expected alerting messages for container '{}' was done!"
                .format(container_name_in_namespace))

    return expected_alerting_messages