def ensure_process_is_running(duthost, container_name, critical_process): """Checks the running status of a critical process and starts it if it was not running. Args: duthost: Hostname of DUT. container_name: A string shows name of a container. critical_process: A string shows name of a process. Returns: None. """ logger.info( "Checking whether process '{}' in container '{}' is running...".format( critical_process, container_name)) program_status, program_pid = get_program_info(duthost, container_name, critical_process) if program_status == "RUNNING": logger.info("Process '{}' in container '{} is running.".format( critical_process, container_name)) else: logger.info( "Process '{}' in container '{}' is not running and start it...". format(critical_process, container_name)) command_output = duthost.shell( "docker exec {} supervisorctl start {}".format( container_name, critical_process)) if command_output["rc"] == 0: logger.info("Process '{}' in container '{}' is started.".format( critical_process, container_name)) else: pytest.fail( "Failed to start process '{}' in container '{}'.".format( critical_process, container_name))
def is_tunnel_packet_handler_running(duthost): """Check if tunnel_packet_handler is running on duthost Args: duthost (AnsibleHost): Device Under Test (DUT) Returns: bool: True if tunnel_packet_handler is running. Otherwise, return False. """ status, _ = get_program_info(duthost, "swss", "tunnel_packet_handler") return status == 'RUNNING'
def stop_critical_processes(duthost, containers_in_namespaces): """Gets critical processes of each running container and then stops them from running. Args: duthost: Hostname of DUT. containers_in_namespaces: A dictionary where keys are container names and values are lists which contains ids of namespaces this container should reside in. Returns: None. """ for container_name in containers_in_namespaces.keys(): namespace_ids = containers_in_namespaces[container_name] container_name_in_namespace = container_name # If a container is only running on host, then namespace_ids is [None] # If a container is running on multi-ASIC, then namespace_ids is [0, 1, ...] # If a container is running on host and multi-ASICs, then namespace_ids is [None, 0, 1, ...] if len(namespace_ids) >= 2: container_name_in_namespace += namespace_ids[1] critical_group_list, critical_process_list, succeeded = duthost.get_critical_group_and_process_lists( container_name_in_namespace) pytest_assert( succeeded, "Failed to get critical group and process lists of container '{}'". format(container_name_in_namespace)) for namespace_id in namespace_ids: container_name_in_namespace = container_name if namespace_id != DEFAULT_ASIC_ID: container_name_in_namespace += namespace_id for critical_process in critical_process_list: # Skip 'dsserve' process since it was not managed by supervisord # TODO: Should remove the following two lines once the issue was solved in the image. if "syncd" in container_name_in_namespace and critical_process == "dsserve": continue program_status, program_pid = get_program_info( duthost, container_name_in_namespace, critical_process) check_and_kill_process(duthost, container_name_in_namespace, critical_process, program_status, program_pid) for critical_group in critical_group_list: group_program_info = get_group_program_info( duthost, container_name_in_namespace, critical_group) for program_name in group_program_info: check_and_kill_process(duthost, container_name_in_namespace, critical_group + ":" + program_name, group_program_info[program_name][0], group_program_info[program_name][1])