Exemple #1
0
 def all_commands_running():
     all_commands_running.running_commands = shellutil.get_running_commands(
     )
     return len(
         all_commands_running.running_commands
     ) >= len(commands_to_execute
              ) + 1  # +1 because run_pipe starts 2 commands
        def _check_processes_in_agent_cgroup(self):
            """
            Verifies that the agent's cgroup includes only the current process, its parent, commands started using shellutil and instances of systemd-run
            (those processes correspond, respectively, to the extension handler, the daemon, commands started by the extension handler, and the systemd-run
            commands used to start extensions on their own cgroup).
            Other processes started by the agent (e.g. extensions) and processes not started by the agent (e.g. services installed by extensions) are reported
            as unexpected, since they should belong to their own cgroup.

            Raises a CGroupsException if the check fails
            """
            unexpected = []

            try:
                daemon = os.getppid()
                extension_handler = os.getpid()
                agent_commands = set()
                agent_commands.update(shellutil.get_running_commands())
                systemd_run_commands = set()
                systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands())
                agent_cgroup = CGroupsApi.get_processes_in_cgroup(self._agent_cpu_cgroup_path)
                # get the running commands again in case new commands started or completed while we were fetching the processes in the cgroup;
                agent_commands.update(shellutil.get_running_commands())
                systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands())

                for process in agent_cgroup:
                    # Note that the agent uses systemd-run to start extensions; systemd-run belongs to the agent cgroup, though the extensions don't.
                    if process in (daemon, extension_handler) or process in systemd_run_commands:
                        continue
                    # systemd_run_commands contains the shell that started systemd-run, so we also need to check for the parent
                    if self._get_parent(process) in systemd_run_commands and self._get_command(process) == 'systemd-run':
                        continue
                    # check if the process is a command started by the agent or a descendant of one of those commands
                    current = process
                    while current != 0 and current not in agent_commands:
                        current = self._get_parent(current)
                    if current == 0:
                        unexpected.append(self.__format_process(process))
                        if len(unexpected) >= 5:  # collect just a small sample
                            break
            except Exception as exception:
                _log_cgroup_warning("Error checking the processes in the agent's cgroup: {0}".format(ustr(exception)))

            if len(unexpected) > 0:
                raise CGroupsException("The agent's cgroup includes unexpected processes: {0}".format(unexpected))
Exemple #3
0
 def no_commands_running():
     no_commands_running.running_commands = shellutil.get_running_commands(
     )
     return len(no_commands_running.running_commands) == 0
    def test_check_processes_in_agent_cgroup_should_raise_a_cgroups_exception_when_there_are_unexpected_processes_in_the_agent_cgroup(
            self):
        with self._get_cgroup_configurator() as configurator:
            pass  # release the mocks used to create the test CGroupConfigurator so that they do not conflict the mock Popen below

        # The test script recursively creates a given number of descendant processes, then it blocks until the
        # 'stop_file' exists. It produces an output file containing the PID of each descendant process.
        test_script = os.path.join(self.tmp_dir, "create_processes.sh")
        stop_file = os.path.join(self.tmp_dir, "create_processes.stop")
        AgentTestCase.create_script(
            test_script, """
#!/usr/bin/env bash
set -euo pipefail

if [[ $# != 2 ]]; then
    echo "Usage: $0 <output_file> <count>"
    exit 1
fi

echo $$ >> $1

if [[ $2 > 1 ]]; then
    $0 $1 $(($2 - 1))
else
    timeout 30s /usr/bin/env bash -c "while ! [[ -f {0} ]]; do sleep 0.25s; done"
fi

exit 0
""".format(stop_file))

        number_of_descendants = 3

        def wait_for_processes(processes_file):
            def _all_present():
                if os.path.exists(processes_file):
                    with open(processes_file, "r") as file_stream:
                        _all_present.processes = [
                            int(process)
                            for process in file_stream.read().split()
                        ]
                return len(_all_present.processes) >= number_of_descendants

            _all_present.processes = []

            if not wait_for(_all_present):
                raise Exception(
                    "Timeout waiting for processes. Expected {0}; got: {1}".
                    format(number_of_descendants,
                           format_processes(_all_present.processes)))

            return _all_present.processes

        threads = []

        try:
            #
            # Start the processes that will be used by the test. We use two sets of processes: the first set simulates a command executed by the agent
            # (e.g. iptables) and its child processes, if any. The second set of processes simulates an extension.
            #
            agent_command_output = os.path.join(self.tmp_dir,
                                                "agent_command.pids")
            agent_command = threading.Thread(
                target=lambda: shellutil.run_command([
                    test_script, agent_command_output,
                    str(number_of_descendants)
                ]))
            agent_command.start()
            threads.append(agent_command)
            agent_command_processes = wait_for_processes(agent_command_output)

            extension_output = os.path.join(self.tmp_dir, "extension.pids")

            def start_extension():
                original_sleep = time.sleep
                original_popen = subprocess.Popen

                # Extensions are started using systemd-run; mock Popen to remove the call to systemd-run; the test script creates a couple of
                # child processes, which would simulate the extension's processes.
                def mock_popen(command, *args, **kwargs):
                    match = re.match(
                        r"^systemd-run --unit=[^\s]+ --scope --slice=[^\s]+ (.+)",
                        command)
                    is_systemd_run = match is not None
                    if is_systemd_run:
                        command = match.group(1)
                    process = original_popen(command, *args, **kwargs)
                    if is_systemd_run:
                        start_extension.systemd_run_pid = process.pid
                    return process

                with patch(
                        'time.sleep', side_effect=lambda _: original_sleep(0.1)
                ):  # start_extension_command has a small delay; skip it
                    with patch(
                            "azurelinuxagent.common.cgroupapi.subprocess.Popen",
                            side_effect=mock_popen):
                        with tempfile.TemporaryFile(dir=self.tmp_dir,
                                                    mode="w+b") as stdout:
                            with tempfile.TemporaryFile(dir=self.tmp_dir,
                                                        mode="w+b") as stderr:
                                configurator.start_extension_command(
                                    extension_name="TestExtension",
                                    command="{0} {1} {2}".format(
                                        test_script, extension_output,
                                        number_of_descendants),
                                    timeout=30,
                                    shell=True,
                                    cwd=self.tmp_dir,
                                    env={},
                                    stdout=stdout,
                                    stderr=stderr)

            start_extension.systemd_run_pid = None

            extension = threading.Thread(target=start_extension)
            extension.start()
            threads.append(extension)
            extension_processes = wait_for_processes(extension_output)

            #
            # check_processes_in_agent_cgroup uses shellutil and the cgroups api to get the commands that are currently running;
            # wait for all the processes to show up
            #
            if not wait_for(lambda: len(shellutil.get_running_commands(
            )) > 0 and len(configurator._cgroups_api.get_systemd_run_commands(
            )) > 0):
                raise Exception(
                    "Timeout while attempting to track the child commands")

            #
            # Verify that check_processes_in_agent_cgroup raises when there are unexpected processes in the agent's cgroup.
            #
            # For the agent's processes, we use the current process and its parent (in the actual agent these would be the daemon and the extension
            # handler), and the commands started by the agent.
            #
            # For other processes, we use process 1, a process that already completed, and an extension. Note that extensions are started using
            # systemd-run and the process for that commands belongs to the agent's cgroup but the processes for the extension should be in a
            # different cgroup
            #
            def get_completed_process():
                random.seed()
                completed = random.randint(1000, 10000)
                while os.path.exists(
                        "/proc/{0}".format(completed)
                ):  # ensure we do not use an existing process
                    completed = random.randint(1000, 10000)
                return completed

            agent_processes = [
                os.getppid(), os.getpid()
            ] + agent_command_processes + [start_extension.systemd_run_pid]
            other_processes = [1, get_completed_process()
                               ] + extension_processes

            with patch(
                    "azurelinuxagent.common.cgroupconfigurator.CGroupsApi.get_processes_in_cgroup",
                    return_value=agent_processes + other_processes):
                with self.assertRaises(CGroupsException) as context_manager:
                    configurator._check_processes_in_agent_cgroup()

                # The list of processes in the message is an array of strings: "['foo', ..., 'bar']"
                message = ustr(context_manager.exception)
                search = re.search(
                    r'unexpected processes: \[(?P<processes>.+)\]', message)
                self.assertIsNotNone(
                    search,
                    "The event message is not in the expected format: {0}".
                    format(message))
                reported = search.group('processes').split(',')

                self.assertEqual(
                    len(other_processes), len(reported),
                    "An incorrect number of processes was reported. Expected: {0} Got: {1}"
                    .format(format_processes(other_processes), reported))
                for pid in other_processes:
                    self.assertTrue(
                        any("[PID: {0}]".format(pid) in reported_process
                            for reported_process in reported),
                        "Process {0} was not reported. Got: {1}".format(
                            format_processes([pid]), reported))
        finally:
            # create the file that stops the test processes and wait for them to complete
            open(stop_file, "w").close()
            for thread in threads:
                thread.join(timeout=5)