def test_container_checker(duthosts, enum_dut_feature_container,
                           rand_selected_dut, tbinfo):
    """Tests the feature of container checker.

    This function will check whether the container names will appear in the Monit
    alerting message if they are stopped explicitly or they hit start limitation.

    Args:
        duthosts: list of DUTs.
        enum_dut_feature_container: A list contains strings ("<dut_name>|<container_name>").
        rand_selected_dut: The fixture returns a randomly selected DuT.
        tbinfo: Testbed information.

    Returns:
        None.
    """
    dut_name, container_name = decode_dut_and_container_name(
        enum_dut_feature_container)
    pytest_require(
        dut_name == rand_selected_dut.hostname and container_name != "unknown",
        "Skips testing container_checker of container '{}' on the DuT '{}' since another DuT '{}' was chosen."
        .format(container_name, dut_name, rand_selected_dut.hostname))
    duthost = duthosts[dut_name]

    loganalyzer = LogAnalyzer(
        ansible_host=duthost,
        marker_prefix="container_checker_{}".format(container_name))

    disabled_containers = get_disabled_container_list(duthost)

    skip_containers = disabled_containers[:]
    skip_containers.append("gbsyncd")
    skip_containers.append("database")
    skip_containers.append("database-chassis")

    # Skip 'radv' container on devices whose role is not T0.
    if tbinfo["topo"]["type"] != "t0":
        skip_containers.append("radv")

    pytest_require(
        container_name not in skip_containers,
        "Container '{}' is skipped for testing.".format(container_name))
    stop_container(duthost, container_name)

    loganalyzer.expect_regex = get_expected_alerting_message(container_name)
    with loganalyzer:
        # Wait for 1 minutes such that Monit has a chance to write alerting message into syslog.
        logger.info("Sleep 1 minutes to wait for the alerting message...")
        time.sleep(70)
def test_monit_new_syntax(duthosts, enum_dut_feature_container, test_setup_and_cleanup,
                          enum_rand_one_per_hwsku_frontend_hostname):
    """Checks that new syntax of Monit can mitigate the issue which shows Monit was unable
    to restart container due to failing reset its internal counter. With the help of this syntax,
    the culprit container can be restarted by Monit if memory usage of it is larger than the threshold
    for specific times continuously.

    Args:
        duthosts: The fixture returns list of DuTs.
        test_setup_and_cleanup: Fixture to setup prerequisites before and after testing.
        enum_rand_one_per_hwsku_frontend_hostname: The fixture randomly pick up
          a frontend DuT from testbed.

    Returns:
        None.
    """
    dut_name, container_name = decode_dut_and_container_name(enum_dut_feature_container)
    pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname,
                   "Skips testing memory_checker of container '{}' on the DuT '{}' since another DuT '{}' was chosen."
                   .format(container_name, dut_name, enum_rand_one_per_hwsku_frontend_hostname))

    pytest_require(container_name == "telemetry",
                   "Skips testing memory_checker of container '{}' since memory monitoring is only enabled for 'telemetry'."
                   .format(container_name))
 
    duthost = duthosts[dut_name]

    # TODO: Currently we only test 'telemetry' container which has the memory threshold 400MB
    # and number of vm_workers is hard coded. We will extend this testing on all containers after
    # the feature 'memory_checker' is fully implemented.
    container_name = "telemetry"
    vm_workers = 6

    pytest_require("Celestica-E1031" not in duthost.facts["hwsku"]
                   and (("20191130" in duthost.os_version and parse_version(duthost.os_version) > parse_version("20191130.72"))
                   or parse_version(duthost.kernel_version) > parse_version("4.9.0")),
                   "Test is not supported for platform Celestica E1031, 20191130.72 and older image versions!")

    logger.info("Checks whether '{}' is running ...".format(container_name))
    is_running = wait_until(CONTAINER_RESTART_THRESHOLD_SECS,
                            CONTAINER_CHECK_INTERVAL_SECS,
                            0,
                            check_container_state, duthost, container_name, True)
    pytest_assert(is_running, "'{}' is not running on DuT!".format(container_name))
    logger.info("'{}' is running on DuT!".format(container_name))

    consumes_memory_and_checks_monit(duthost, container_name, vm_workers, True)
def test_monit_reset_counter_failure(duthosts, enum_dut_feature_container, test_setup_and_cleanup,
                                     enum_rand_one_per_hwsku_frontend_hostname):
    """Checks that Monit was unable to reset its counter. Specifically Monit will restart
    the contanier if memory usage of it is larger than the threshold for specific times within
    a sliding window. However, Monit was unable to restart the container anymore if memory usage is
    still larger than the threshold continuoulsy since Monit failed to reset its internal counter.
    The `stress` utility is leveraged as the memory stressing tool.

    Args:
        duthosts: The fixture returns list of DuTs.
        test_setup_and_cleanup: Fixture to setup prerequisites before and after testing.
        enum_rand_one_per_hwsku_frontend_hostname: The fixture randomly pick up
          a frontend DuT from testbed.

    Returns:
        None.
    """
    dut_name, container_name = decode_dut_and_container_name(enum_dut_feature_container)
    pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname,
                   "Skips testing memory_checker of container '{}' on the DuT '{}' since another DuT '{}' was chosen."
                   .format(container_name, dut_name, enum_rand_one_per_hwsku_frontend_hostname))

    pytest_require(container_name == "telemetry",
                   "Skips testing memory_checker of container '{}' since memory monitoring is only enabled for 'telemetry'."
                   .format(container_name))

    duthost = duthosts[dut_name]

    # TODO: Currently we only test 'telemetry' container which has the memory threshold 400MB
    # and number of vm_workers is hard coded. We will extend this testing on all containers after
    # the feature 'memory_checker' is fully implemented.
    container_name = "telemetry"
    vm_workers = 6

    pytest_require("Celestica-E1031" not in duthost.facts["hwsku"]
                   and ("20201231" in duthost.os_version or parse_version(duthost.kernel_version) > parse_version("4.9.0")),
                   "Test is not supported for platform Celestica E1031, 20191130 and older image versions!")

    logger.info("Checks whether '{}' is running ...".format(container_name))
    is_running = wait_until(CONTAINER_RESTART_THRESHOLD_SECS,
                            CONTAINER_CHECK_INTERVAL_SECS,
                            0,
                            check_container_state, duthost, container_name, True)
    pytest_assert(is_running, "'{}' is not running on DuT!".format(container_name))
    logger.info("'{}' is running on DuT!".format(container_name))

    consumes_memory_and_checks_monit(duthost, container_name, vm_workers, False)
def test_memory_checker(duthosts, enum_dut_feature_container, creds, enum_rand_one_per_hwsku_frontend_hostname):
    """Checks whether the telemetry container can be restarted or not if the memory
    usage of it is beyond the threshold. The `stress` utility is leveraged as
    the memory stressing tool.

    Args:
        duthosts: The fixture returns list of DuTs.
        enum_rand_one_per_hwsku_frontend_hostname: The fixture randomly pick up
          a frontend DuT from testbed.

    Returns:
        None.
    """
    dut_name, container_name = decode_dut_and_container_name(enum_dut_feature_container)
    pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname and container_name == "telemetry",
                   "Skips testing memory_checker of container '{}' on the DuT '{}' since another DuT '{}' was chosen."
                   .format(container_name, dut_name, enum_rand_one_per_hwsku_frontend_hostname))
    duthost = duthosts[dut_name]

    # TODO: Currently we only test 'telemetry' container which has the memory threshold 400MB
    # and number of vm_workers is hard coded. We will extend this testing on all containers after
    # the feature 'memory_checker' is fully implemented.
    container_name = "telemetry"
    vm_workers = 4

    pytest_require("Celestica-E1031" not in duthost.facts["hwsku"]
                   and (("20191130" in duthost.os_version and parse_version(duthost.os_version) > parse_version("20191130.72"))
                   or parse_version(duthost.kernel_version) > parse_version("4.9.0")),
                   "Test is not supported for platform Celestica E1031, 20191130.72 and older image versions!")

    expected_alerting_messages = []
    loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="container_restart_due_to_memory")
    loganalyzer.expect_regex = []
    expected_alerting_messages.append(".*restart_service.*Restarting service 'telemetry'.*")
    expected_alerting_messages.append(".*Stopping Telemetry container.*")
    expected_alerting_messages.append(".*Stopped Telemetry container.*")

    loganalyzer.expect_regex.extend(expected_alerting_messages)
    marker = loganalyzer.init()

    install_stress_utility(duthost, creds, container_name)
    consume_memory_and_restart_container(duthost, container_name, vm_workers, loganalyzer, marker)

    remove_stress_utility(duthost, container_name)
    postcheck_critical_processes(duthost, container_name)
def modify_monit_config_and_restart(duthosts, enum_dut_feature_container, enum_rand_one_per_hwsku_frontend_hostname):

    """Backup Monit configuration files, then customize and restart it before testing.
    Restore original Monit configuration files and restart Monit service after testing.

    Args:
        duthost: Hostname of DuT.

    Returns:
        None.
    """
    dut_name, container_name = decode_dut_and_container_name(enum_dut_feature_container)
    pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname and container_name == "telemetry",
                   "Skips testing memory_checker of container '{}' on the DuT '{}' since another DuT '{}' was chosen."
                   .format(container_name, dut_name, enum_rand_one_per_hwsku_frontend_hostname))
    duthost = duthosts[dut_name]

    logger.info("Back up Monit configuration files on DuT '{}' ...".format(duthost.hostname))
    duthost.shell("sudo cp -f /etc/monit/monitrc /tmp/")
    duthost.shell("sudo cp -f /etc/monit/conf.d/monit_telemetry /tmp/")

    temp_config_line = '    if status == 3 for 5 times within 10 cycles then exec "/usr/bin/restart_service telemetry"'
    logger.info("Modifying Monit config to eliminate start delay and decrease interval ...")
    duthost.shell("sudo sed -i '$s/^./#/' /etc/monit/conf.d/monit_telemetry")
    duthost.shell("echo '{}' | sudo tee -a /etc/monit/conf.d/monit_telemetry".format(temp_config_line))
    duthost.shell("sudo sed -i 's/set daemon 60/set daemon 10/' /etc/monit/monitrc")
    duthost.shell("sudo sed -i '/with start delay 300/s/^./#/' /etc/monit/monitrc")

    logger.info("Restart Monit service ...")
    duthost.shell("sudo systemctl restart monit")

    yield

    logger.info("Restore original Monit configuration files on DuT '{}' ...".format(duthost.hostname))
    duthost.shell("sudo mv -f /tmp/monitrc /etc/monit/")
    duthost.shell("sudo mv -f /tmp/monit_telemetry /etc/monit/conf.d/")

    logger.info("Restart Monit service ...")
    duthost.shell("sudo systemctl restart monit")

    logger.info("Restore bgp neighbours ...")
    duthost.shell("config bgp startup all")
def test_memory_checker(duthosts, enum_dut_feature_container, test_setup_and_cleanup,
                        enum_rand_one_per_hwsku_frontend_hostname):
    """Checks whether the container can be restarted or not if the memory
    usage of it is beyond its threshold for specfic times within a sliding window.
    The `stress` utility is leveraged as the memory stressing tool.

    Args:
        duthosts: The fixture returns list of DuTs.
        enum_rand_one_per_hwsku_frontend_hostname: The fixture randomly pick up
          a frontend DuT from testbed.

    Returns:
        None.
    """
    dut_name, container_name = decode_dut_and_container_name(enum_dut_feature_container)
    pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname,
                   "Skips testing memory_checker of container '{}' on the DuT '{}' since another DuT '{}' was chosen."
                   .format(container_name, dut_name, enum_rand_one_per_hwsku_frontend_hostname))

    pytest_require(container_name == "telemetry",
                   "Skips testing memory_checker of container '{}' since memory monitoring is only enabled for 'telemetry'."
                   .format(container_name))

    duthost = duthosts[dut_name]

    # TODO: Currently we only test 'telemetry' container which has the memory threshold 400MB
    # and number of vm_workers is hard coded. We will extend this testing on all containers after
    # the feature 'memory_checker' is fully implemented.
    container_name = "telemetry"
    vm_workers = 6

    pytest_require("Celestica-E1031" not in duthost.facts["hwsku"]
                   and (("20191130" in duthost.os_version and parse_version(duthost.os_version) > parse_version("20191130.72"))
                   or parse_version(duthost.kernel_version) > parse_version("4.9.0")),
                   "Test is not supported for platform Celestica E1031, 20191130.72 and older image versions!")

    if not is_container_running(duthost, container_name):
        pytest.fail("'{}' is nor running!".format(container_name))

    consumes_memory_and_checks_container_restart(duthost, container_name, vm_workers)
def test_setup_and_cleanup(duthosts, creds, enum_dut_feature_container,
                           enum_rand_one_per_hwsku_frontend_hostname, request):
    """Backups Monit configuration files, customizes Monit configuration files and
    restarts Monit service before testing. Restores original Monit configuration files
    and restart Monit service after testing.

    Args:
        duthost: Hostname of DuT.

    Returns:
        None.
    """
    dut_name, container_name = decode_dut_and_container_name(enum_dut_feature_container)
    pytest_require(dut_name == enum_rand_one_per_hwsku_frontend_hostname,
                   "Skips testing memory_checker of container '{}' on the DuT '{}' since another DuT '{}' was chosen."
                   .format(container_name, dut_name, enum_rand_one_per_hwsku_frontend_hostname))

    pytest_require(container_name == "telemetry",
                   "Skips testing memory_checker of container '{}' since memory monitoring is only enabled for 'telemetry'."
                   .format(container_name))
 
    duthost = duthosts[dut_name]


    install_stress_utility(duthost, creds, container_name)

    backup_monit_config_files(duthost)
    customize_monit_config_files(duthost, request.param)
    restart_monit_service(duthost)

    yield

    restore_monit_config_files(duthost)
    restart_monit_service(duthost)

    restart_container(duthost, container_name)
    remove_stress_utility(duthost, container_name)
    postcheck_critical_processes(duthost, container_name)