Beispiel #1
0
        def initialize(self):
            try:
                if self._initialized:
                    return

                # check whether cgroup monitoring is supported on the current distro
                self._cgroups_supported = CGroupsApi.cgroups_supported()
                if not self._cgroups_supported:
                    logger.info("Cgroup monitoring is not supported on {0}", get_distro())
                    return

                # check that systemd is detected correctly
                self._cgroups_api = SystemdCgroupsApi()
                if not systemd.is_systemd():
                    _log_cgroup_warning("systemd was not detected on {0}", get_distro())
                    return

                _log_cgroup_info("systemd version: {0}", systemd.get_version())

                # This is temporarily disabled while we analyze telemetry. Likely it will be removed.
                # self.__collect_azure_unit_telemetry()
                # self.__collect_agent_unit_files_telemetry()

                if not self.__check_no_legacy_cgroups():
                    return

                agent_unit_name = systemd.get_agent_unit_name()
                agent_slice = systemd.get_unit_property(agent_unit_name, "Slice")
                if agent_slice not in (_AZURE_SLICE, "system.slice"):
                    _log_cgroup_warning("The agent is within an unexpected slice: {0}", agent_slice)
                    return

                self.__setup_azure_slice()

                cpu_controller_root, memory_controller_root = self.__get_cgroup_controllers()
                self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path = self.__get_agent_cgroups(agent_slice, cpu_controller_root, memory_controller_root)

                if self._agent_cpu_cgroup_path is not None:
                    _log_cgroup_info("Agent CPU cgroup: {0}", self._agent_cpu_cgroup_path)
                    self.enable()
                    CGroupsTelemetry.track_cgroup(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path))

                _log_cgroup_info('Cgroups enabled: {0}', self._cgroups_enabled)

            except Exception as exception:
                _log_cgroup_warning("Error initializing cgroups: {0}", ustr(exception))
            finally:
                self._initialized = True
Beispiel #2
0
    def test_cgroups_should_be_supported_only_on_ubuntu_16_and_later(self):
        test_cases = [
            (['ubuntu', '16.04', 'xenial'], True),
            (['ubuntu', '16.10', 'yakkety'], True),
            (['ubuntu', '18.04', 'bionic'], True),
            (['ubuntu', '18.10', 'cosmic'], True),
            (['ubuntu', '20.04', 'focal'], True),
            (['ubuntu', '20.10', 'groovy'], True),
            (['centos', '7.5', 'Source'], False),
            (['redhat', '7.7', 'Maipo'], False),
            (['redhat', '7.7.1908', 'Core'], False),
            (['bigip', '15.0.1', 'Final'], False),
            (['gaia', '273.562', 'R80.30'], False),
            (['debian', '9.1', ''], False),
        ]

        for (distro, supported) in test_cases:
            with patch("azurelinuxagent.common.cgroupapi.get_distro", return_value=distro):
                self.assertEqual(CGroupsApi.cgroups_supported(), supported, "cgroups_supported() failed on {0}".format(distro))
Beispiel #3
0
        def initialize(self):
            try:
                if self._initialized:
                    return

                #
                # check whether cgroup monitoring is supported on the current distro
                #
                self._cgroups_supported = CGroupsApi.cgroups_supported()
                if not self._cgroups_supported:
                    logger.info("Cgroup monitoring is not supported on {0}",
                                get_distro())
                    return

                #
                # check systemd
                #
                self._cgroups_api = CGroupsApi.create()

                if not isinstance(self._cgroups_api, SystemdCgroupsApi):
                    message = "systemd was not detected on {0}".format(
                        get_distro())
                    logger.warn(message)
                    add_event(op=WALAEventOperation.CGroupsInitialize,
                              is_success=False,
                              message=message,
                              log_event=False)
                    return

                def log_cgroup_info(format_string, *args):
                    message = format_string.format(*args)
                    logger.info(message)
                    add_event(op=WALAEventOperation.CGroupsInfo,
                              message=message)

                def log_cgroup_warn(format_string, *args):
                    message = format_string.format(*args)
                    logger.warn(message)
                    add_event(op=WALAEventOperation.CGroupsInfo,
                              message=message,
                              is_success=False,
                              log_event=False)

                log_cgroup_info("systemd version: {0}",
                                self._cgroups_api.get_systemd_version())

                #
                # Older versions of the daemon (2.2.31-2.2.40) wrote their PID to /sys/fs/cgroup/{cpu,memory}/WALinuxAgent/WALinuxAgent.  When running
                # under systemd this could produce invalid resource usage data. Do not enable cgroups under this condition.
                #
                legacy_cgroups = self._cgroups_api.cleanup_legacy_cgroups()

                if legacy_cgroups > 0:
                    log_cgroup_warn(
                        "The daemon's PID was added to a legacy cgroup; will not monitor resource usage."
                    )
                    return

                #
                # check v1 controllers
                #
                cpu_controller_root, memory_controller_root = self._cgroups_api.get_cgroup_mount_points(
                )

                if cpu_controller_root is not None:
                    logger.info("The CPU cgroup controller is mounted at {0}",
                                cpu_controller_root)
                else:
                    log_cgroup_warn("The CPU cgroup controller is not mounted")

                if memory_controller_root is not None:
                    logger.info(
                        "The memory cgroup controller is mounted at {0}",
                        memory_controller_root)
                else:
                    log_cgroup_warn(
                        "The memory cgroup controller is not mounted")

                #
                # check v2 controllers
                #
                cgroup2_mountpoint, cgroup2_controllers = self._cgroups_api.get_cgroup2_controllers(
                )
                if cgroup2_mountpoint is not None:
                    log_cgroup_warn(
                        "cgroups v2 mounted at {0}.  Controllers: [{1}]",
                        cgroup2_mountpoint, cgroup2_controllers)

                #
                # check the cgroups for the agent
                #
                agent_unit_name = self._cgroups_api.get_agent_unit_name()
                cpu_cgroup_relative_path, memory_cgroup_relative_path = self._cgroups_api.get_process_cgroup_relative_paths(
                    "self")
                if cpu_cgroup_relative_path is None:
                    log_cgroup_warn(
                        "The agent's process is not within a CPU cgroup")
                else:
                    cpu_accounting = self._cgroups_api.get_unit_property(
                        agent_unit_name, "CPUAccounting")
                    log_cgroup_info('CPUAccounting: {0}', cpu_accounting)

                if memory_cgroup_relative_path is None:
                    log_cgroup_warn(
                        "The agent's process is not within a memory cgroup")
                else:
                    memory_accounting = self._cgroups_api.get_unit_property(
                        agent_unit_name, "MemoryAccounting")
                    log_cgroup_info('MemoryAccounting: {0}', memory_accounting)

                #
                # All good, enable cgroups and start monitoring the agent
                #
                self._cgroups_enabled = True

                if cpu_controller_root is None or cpu_cgroup_relative_path is None:
                    logger.info("Will not track CPU for the agent's cgroup")
                else:
                    self._agent_cpu_cgroup_path = os.path.join(
                        cpu_controller_root, cpu_cgroup_relative_path)
                    CGroupsTelemetry.track_cgroup(
                        CpuCgroup(agent_unit_name,
                                  self._agent_cpu_cgroup_path))

                if memory_controller_root is None or memory_cgroup_relative_path is None:
                    logger.info("Will not track memory for the agent's cgroup")
                else:
                    self._agent_memory_cgroup_path = os.path.join(
                        memory_controller_root, memory_cgroup_relative_path)
                    CGroupsTelemetry.track_cgroup(
                        MemoryCgroup(agent_unit_name,
                                     self._agent_memory_cgroup_path))

                log_cgroup_info("Agent cgroups: CPU: {0} -- MEMORY: {1}",
                                self._agent_cpu_cgroup_path,
                                self._agent_memory_cgroup_path)

            except Exception as e:
                message = "Error initializing cgroups: {0}".format(ustr(e))
                logger.warn(message)
                add_event(op=WALAEventOperation.CGroupsInitialize,
                          is_success=False,
                          message=message,
                          log_event=False)
            finally:
                self._initialized = True