Example #1
0
    def test_telemetry_polling_to_generate_transient_logs_ioerror_permission_denied(self, mock_read_file,
                                                                                    patch_periodic_warn, *args):
        num_extensions = 1
        num_controllers = 2
        is_active_check_per_controller = 2

        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        self.assertEqual(0, patch_periodic_warn.call_count)

        # Expecting logs to be present for different kind of errors
        io_error_3 = IOError()
        io_error_3.errno = errno.EPERM
        mock_read_file.side_effect = io_error_3

        poll_count = 1
        expected_count_per_call = num_controllers + is_active_check_per_controller
        # each collect per controller would generate a log statement, and each cgroup would invoke a
        # is active check raising an exception

        for data_count in range(poll_count, 10):
            CGroupsTelemetry.poll_all_tracked()
            self.assertEqual(poll_count * expected_count_per_call, patch_periodic_warn.call_count)
Example #2
0
    def test_cleanup_legacy_cgroups_should_disable_cgroups_when_the_daemon_was_added_to_the_legacy_cgroup_on_systemd(self, _):
        # Set up a mock /var/run/waagent.pid file
        daemon_pid = "42"
        daemon_pid_file = os.path.join(self.tmp_dir, "waagent.pid")
        fileutil.write_file(daemon_pid_file, daemon_pid + "\n")

        # Set up old controller cgroups and add the daemon PID to them
        CGroupsTools.create_legacy_agent_cgroup(self.cgroups_file_system_root, "cpu", daemon_pid)
        CGroupsTools.create_legacy_agent_cgroup(self.cgroups_file_system_root, "memory", daemon_pid)

        # Start tracking a couple of dummy cgroups
        CGroupsTelemetry.track_cgroup(CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "cpu"))
        CGroupsTelemetry.track_cgroup(CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "memory"))

        cgroup_configurator = CGroupConfigurator.get_instance()

        with patch("azurelinuxagent.common.cgroupconfigurator.add_event") as mock_add_event:
            with patch("azurelinuxagent.common.cgroupapi.get_agent_pid_file_path", return_value=daemon_pid_file):
                    cgroup_configurator.cleanup_legacy_cgroups()

        self.assertEquals(len(mock_add_event.call_args_list), 1)
        _, kwargs = mock_add_event.call_args_list[0]
        self.assertEquals(kwargs['op'], 'CGroupsCleanUp')
        self.assertFalse(kwargs['is_success'])
        self.assertEquals(
            kwargs['message'],
            "Failed to process legacy cgroups. Collection of resource usage data will be disabled. [CGroupsException] The daemon's PID ({0}) was already added to the legacy cgroup; this invalidates resource usage data.".format(daemon_pid))

        self.assertFalse(cgroup_configurator.enabled())
        self.assertEquals(len(CGroupsTelemetry._tracked), 0)
Example #3
0
    def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args):
        num_extensions = 5

        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent:
            with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
                with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
                    patch_is_active.return_value = True
                    patch_get_memory_usage.side_effect = Exception("File not found")

                    current_cpu = 30
                    patch_get_cpu_percent.return_value = current_cpu

                    poll_count = 1

                    for data_count in range(poll_count, 10):
                        CGroupsTelemetry.poll_all_tracked()

                        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)
                        self._assert_cgroup_metrics_equal(cpu_usage=[current_cpu] * data_count, memory_usage=[], max_memory_usage=[])

                    CGroupsTelemetry.report_all_tracked()

                    self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)
                    self._assert_cgroup_metrics_equal([], [], [])
Example #4
0
    def exercise_telemetry_instantiation(self, test_cgroup):
        test_extension_name = test_cgroup.name
        CGroupsTelemetry.track_cgroup(test_cgroup)
        self.assertIn('cpu', test_cgroup.cgroups)
        self.assertIn('memory', test_cgroup.cgroups)
        self.assertTrue(CGroupsTelemetry.is_tracked(test_extension_name))
        consume_cpu_time()
        time.sleep(1)
        metrics, limits = CGroupsTelemetry.report_all_tracked()
        my_metrics = metrics[test_extension_name]
        self.assertEqual(len(my_metrics), 2)
        for item in my_metrics:
            metric_family, metric_name, metric_value = item
            if metric_family == "Process":
                self.assertEqual(metric_name, "% Processor Time")
                self.assertGreater(metric_value, 0.0)
            elif metric_family == "Memory":
                self.assertEqual(metric_name, "Total Memory Usage")
                self.assertGreater(metric_value, 100000)
            else:
                self.fail("Unknown metric {0}/{1} value {2}".format(
                    metric_family, metric_name, metric_value))

        my_limits = limits[test_extension_name]
        self.assertIsInstance(my_limits,
                              CGroupsLimits,
                              msg="is not the correct instance")
        self.assertGreater(my_limits.cpu_limit, 0.0)
        self.assertGreater(my_limits.memory_limit, 0.0)
Example #5
0
    def test_generate_extension_metrics_telemetry_dictionary(self, *args):  # pylint: disable=unused-argument
        num_polls = 10
        num_extensions = 1

        cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)]

        # only verifying calculations and not validity of the values.
        memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)]
        max_memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)]

        # no need to initialize the CPU usage, since we mock get_cpu_usage() below
        with patch("azurelinuxagent.common.cgroup.CpuCgroup.initialize_cpu_usage"):
            for i in range(num_extensions):
                dummy_cpu_cgroup = CpuCgroup("dummy_extension_{0}".format(i), "dummy_cpu_path_{0}".format(i))
                CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

                dummy_memory_cgroup = MemoryCgroup("dummy_extension_{0}".format(i), "dummy_memory_path_{0}".format(i))
                CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked))

        with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage:
            with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
                with patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage:
                    with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
                        for i in range(num_polls):
                            patch_is_active.return_value = True
                            patch_get_cpu_usage.return_value = cpu_percent_values[i]
                            patch_get_memory_usage.return_value = memory_usage_values[i]  # example 200 MB
                            patch_get_memory_max_usage.return_value = max_memory_usage_values[i]  # example 450 MB
                            CGroupsTelemetry.poll_all_tracked()
Example #6
0
 def track_cgroups(extension_cgroups):
     try:
         for cgroup in extension_cgroups:
             CGroupsTelemetry.track_cgroup(cgroup)
     except Exception as e:
         logger.warn("Cannot add cgroup '{0}' to tracking list; resource usage will not be tracked. "
                     "Error: {1}".format(cgroup.path, ustr(e)))
            def __impl():
                cgroups = self._cgroups_api.create_agent_cgroups()

                if track_cgroups:
                    for cgroup in cgroups:
                        CGroupsTelemetry.track_cgroup(cgroup)

                return cgroups
    def _track_new_extension_cgroups(num_extensions):
        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)
    def test_telemetry_polling_with_inactive_cgroups(self, *args):
        num_extensions = 5
        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_max_usage") as patch_get_memory_max_usage:
            with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_usage") as patch_get_memory_usage:
                with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent:
                    with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
                        patch_is_active.return_value = False

                        no_extensions_expected = 0
                        data_count = 1
                        current_cpu = 30
                        current_memory = 209715200
                        current_max_memory = 471859200

                        patch_get_cpu_percent.return_value = current_cpu
                        patch_get_memory_usage.return_value = current_memory  # example 200 MB
                        patch_get_memory_max_usage.return_value = current_max_memory  # example 450 MB

                        for i in range(num_extensions):
                            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
                            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

                        CGroupsTelemetry.poll_all_tracked()

                        for i in range(num_extensions):
                            self.assertFalse(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
                            self.assertFalse(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

                        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)

                        for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items():
                            current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics()

                            self.assertEqual(len(current_memory_usage._data), data_count)
                            self.assertListEqual(current_memory_usage._data, [current_memory] * data_count)
                            self.assertEqual(len(max_memory_levels._data), data_count)
                            self.assertListEqual(max_memory_levels._data, [current_max_memory] * data_count)
                            self.assertEqual(len(current_cpu_usage._data), data_count)
                            self.assertListEqual(current_cpu_usage._data, [current_cpu] * data_count)

                        CGroupsTelemetry.report_all_tracked()

                        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), no_extensions_expected)
                        for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items():
                            current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics()
                            self.assertEqual(len(current_memory_usage._data), 0)
                            self.assertEqual(len(max_memory_levels._data), 0)
                            self.assertEqual(len(current_cpu_usage._data), 0)
Example #10
0
    def test_telemetry_polling_with_changing_cgroups_state(self, *args):
        num_extensions = 5
        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage:
            with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
                with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent:
                    with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
                        patch_is_active.return_value = True

                        no_extensions_expected = 0
                        expected_data_count = 2

                        current_cpu = 30
                        current_memory = 209715200
                        current_max_memory = 471859200

                        patch_get_cpu_percent.return_value = current_cpu
                        patch_get_memory_usage.return_value = current_memory  # example 200 MB
                        patch_get_memory_max_usage.return_value = current_max_memory  # example 450 MB

                        for i in range(num_extensions):
                            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
                            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

                        CGroupsTelemetry.poll_all_tracked()

                        for i in range(num_extensions):
                            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
                            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

                        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)

                        patch_is_active.return_value = False
                        CGroupsTelemetry.poll_all_tracked()

                        for i in range(num_extensions):
                            self.assertFalse(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
                            self.assertFalse(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

                        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)
                        self._assert_cgroup_metrics_equal(
                            cpu_usage=[current_cpu] * expected_data_count,
                            memory_usage=[current_memory] * expected_data_count,
                            max_memory_usage=[current_max_memory] * expected_data_count)

                        CGroupsTelemetry.report_all_tracked()

                        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), no_extensions_expected)
                        self._assert_cgroup_metrics_equal([], [], [])
Example #11
0
    def test_cleanup_legacy_cgroups_should_disable_cgroups_when_it_fails_to_process_legacy_cgroups(
            self):
        # Set up a mock /var/run/waagent.pid file
        daemon_pid = "42"
        daemon_pid_file = os.path.join(self.tmp_dir, "waagent.pid")
        fileutil.write_file(daemon_pid_file, daemon_pid + "\n")

        # Set up old controller cgroups and add the daemon PID to them
        CGroupsTools.create_legacy_agent_cgroup(self.cgroups_file_system_root,
                                                "cpu", daemon_pid)
        CGroupsTools.create_legacy_agent_cgroup(self.cgroups_file_system_root,
                                                "memory", daemon_pid)

        # Set up new controller cgroups and add extension handler's PID to them
        CGroupsTools.create_agent_cgroup(self.cgroups_file_system_root, "cpu",
                                         "999")
        CGroupsTools.create_agent_cgroup(self.cgroups_file_system_root,
                                         "memory", "999")

        def mock_append_file(filepath, contents, **kwargs):
            if re.match(r'/.*/cpu/.*/cgroup.procs', filepath):
                raise OSError(errno.ENOSPC, os.strerror(errno.ENOSPC))
            fileutil.append_file(filepath, contents, **kwargs)

        # Start tracking a couple of dummy cgroups
        CGroupsTelemetry.track_cgroup(
            CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service",
                   "cpu"))
        CGroupsTelemetry.track_cgroup(
            CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service",
                   "memory"))

        cgroup_configurator = CGroupConfigurator.get_instance()

        with patch("azurelinuxagent.common.cgroupconfigurator.add_event"
                   ) as mock_add_event:
            with patch(
                    "azurelinuxagent.common.cgroupapi.get_agent_pid_file_path",
                    return_value=daemon_pid_file):
                with patch(
                        "azurelinuxagent.common.cgroupapi.fileutil.append_file",
                        side_effect=mock_append_file):
                    cgroup_configurator.cleanup_legacy_cgroups()

        self.assertEquals(len(mock_add_event.call_args_list), 1)
        _, kwargs = mock_add_event.call_args_list[0]
        self.assertEquals(kwargs['op'], 'CGroupsCleanUp')
        self.assertFalse(kwargs['is_success'])
        self.assertEquals(
            kwargs['message'],
            'Failed to process legacy cgroups. Collection of resource usage data will be disabled. [Errno 28] No space left on device'
        )

        self.assertFalse(cgroup_configurator.enabled())
        self.assertEquals(len(CGroupsTelemetry._tracked), 0)
Example #12
0
    def test_disable_should_reset_tracked_cgroups(self):
        configurator = CGroupConfigurator.get_instance()

        # Start tracking a couple of dummy cgroups
        CGroupsTelemetry.track_cgroup(CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "cpu"))
        CGroupsTelemetry.track_cgroup(CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "memory"))

        configurator.disable()

        self.assertFalse(configurator.enabled())
        self.assertEquals(len(CGroupsTelemetry._tracked), 0)
Example #13
0
    def test_telemetry_calculations(self, *args):
        num_polls = 10
        num_extensions = 1
        num_summarization_values = 7

        cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)]

        # only verifying calculations and not validity of the values.
        memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)]
        max_memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)]

        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked))

        with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage:
            with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
                with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent:
                    with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
                        for i in range(num_polls):
                            patch_is_active.return_value = True
                            patch_get_cpu_percent.return_value = cpu_percent_values[i]
                            patch_get_memory_usage.return_value = memory_usage_values[i]  # example 200 MB
                            patch_get_memory_max_usage.return_value = max_memory_usage_values[i]  # example 450 MB
                            CGroupsTelemetry.poll_all_tracked()

        collected_metrics = CGroupsTelemetry.report_all_tracked()
        for i in range(num_extensions):
            name = "dummy_extension_{0}".format(i)

            self.assertIn(name, collected_metrics)
            self.assertIn("memory", collected_metrics[name])
            self.assertIn("cur_mem", collected_metrics[name]["memory"])
            self.assertIn("max_mem", collected_metrics[name]["memory"])
            self.assertEqual(num_summarization_values, len(collected_metrics[name]["memory"]["cur_mem"]))
            self.assertEqual(num_summarization_values, len(collected_metrics[name]["memory"]["max_mem"]))

            self.assertListEqual(generate_metric_list(memory_usage_values),
                                 collected_metrics[name]["memory"]["cur_mem"][0:5])
            self.assertListEqual(generate_metric_list(max_memory_usage_values),
                                 collected_metrics[name]["memory"]["max_mem"][0:5])

            self.assertIn("cpu", collected_metrics[name])
            self.assertIn("cur_cpu", collected_metrics[name]["cpu"])
            self.assertEqual(num_summarization_values, len(collected_metrics[name]["cpu"]["cur_cpu"]))
            self.assertListEqual(generate_metric_list(cpu_percent_values),
                                 collected_metrics[name]["cpu"]["cur_cpu"][0:5])
    def test_disable_should_reset_tracked_cgroups(self):
        # Start tracking a couple of dummy cgroups
        CGroupsTelemetry.track_cgroup(
            CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service",
                   "cpu"))
        CGroupsTelemetry.track_cgroup(
            CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service",
                   "memory"))

        CGroupConfiguratorSystemdTestCase._get_new_cgroup_configurator_instance(
        ).disable()

        self.assertEquals(len(CGroupsTelemetry._tracked), 0)
        def start_tracking_unit_cgroups(self, unit_name):
            """
            TODO: Start tracking Memory Cgroups
            """
            try:
                cpu_cgroup_path, _ = self._cgroups_api.get_unit_cgroup_paths(unit_name)

                if cpu_cgroup_path is None:
                    logger.info("The CPU controller is not mounted; will not track resource usage")
                else:
                    CGroupsTelemetry.track_cgroup(CpuCgroup(unit_name, cpu_cgroup_path))

            except Exception as exception:
                logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(exception))
    def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args):
        num_extensions = 5
        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_max_usage") as patch_get_memory_max_usage:
            with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_usage") as patch_get_memory_usage:
                with patch("azurelinuxagent.common.cgroup.CpuCgroup.collect") as patch_cpu_cgroup_collect:
                    with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
                        patch_is_active.return_value = True

                        patch_cpu_cgroup_collect.side_effect = Exception("File not found")

                        current_memory = 209715200
                        current_max_memory = 471859200

                        patch_get_memory_usage.return_value = current_memory  # example 200 MB
                        patch_get_memory_max_usage.return_value = current_max_memory  # example 450 MB

                        poll_count = 1

                        for data_count in range(poll_count, 10):
                            CGroupsTelemetry.poll_all_tracked()
                            self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)

                            for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items():
                                current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics()
                                self.assertEqual(len(current_memory_usage._data), data_count)
                                self.assertListEqual(current_memory_usage._data, [current_memory] * data_count)
                                self.assertEqual(len(max_memory_levels._data), data_count)
                                self.assertListEqual(max_memory_levels._data, [current_max_memory] * data_count)

                                self.assertEqual(len(current_cpu_usage._data), 0)
                                self.assertListEqual(current_cpu_usage._data, [])

                        CGroupsTelemetry.report_all_tracked()

                        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)
                        for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items():
                            current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics()
                            self.assertEqual(len(current_memory_usage._data), 0)
                            self.assertEqual(len(max_memory_levels._data), 0)
                            self.assertEqual(len(current_cpu_usage._data), 0)
Example #17
0
        def initialize(self):
            try:
                if self._initialized:
                    return

                # check whether cgroup monitoring is supported on the current distro
                self._cgroups_supported = CGroupsApi.cgroups_supported()
                if not self._cgroups_supported:
                    logger.info("Cgroup monitoring is not supported on {0}", get_distro())
                    return

                # check that systemd is detected correctly
                self._cgroups_api = SystemdCgroupsApi()
                if not systemd.is_systemd():
                    _log_cgroup_warning("systemd was not detected on {0}", get_distro())
                    return

                _log_cgroup_info("systemd version: {0}", systemd.get_version())

                # This is temporarily disabled while we analyze telemetry. Likely it will be removed.
                # self.__collect_azure_unit_telemetry()
                # self.__collect_agent_unit_files_telemetry()

                if not self.__check_no_legacy_cgroups():
                    return

                agent_unit_name = systemd.get_agent_unit_name()
                agent_slice = systemd.get_unit_property(agent_unit_name, "Slice")
                if agent_slice not in (_AZURE_SLICE, "system.slice"):
                    _log_cgroup_warning("The agent is within an unexpected slice: {0}", agent_slice)
                    return

                self.__setup_azure_slice()

                cpu_controller_root, memory_controller_root = self.__get_cgroup_controllers()
                self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path = self.__get_agent_cgroups(agent_slice, cpu_controller_root, memory_controller_root)

                if self._agent_cpu_cgroup_path is not None:
                    _log_cgroup_info("Agent CPU cgroup: {0}", self._agent_cpu_cgroup_path)
                    self.enable()
                    CGroupsTelemetry.track_cgroup(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path))

                _log_cgroup_info('Cgroups enabled: {0}', self._cgroups_enabled)

            except Exception as exception:
                _log_cgroup_warning("Error initializing cgroups: {0}", ustr(exception))
            finally:
                self._initialized = True
Example #18
0
    def test_cgroup_is_tracked(self, *args):
        num_extensions = 5
        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".
                                                format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        for i in range(num_extensions):
            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

        self.assertFalse(CGroupsTelemetry.is_tracked("not_present_cpu_dummy_path"))
        self.assertFalse(CGroupsTelemetry.is_tracked("not_present_memory_dummy_path"))
Example #19
0
    def test_cgroup_tracking(self, *args):
        num_extensions = 5
        num_controllers = 2
        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        for i in range(num_extensions):
            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

        self.assertEqual(num_extensions * num_controllers, len(CGroupsTelemetry._tracked))
Example #20
0
    def test_telemetry_polling_to_generate_transient_logs_index_error(self, mock_read_file, *args):
        num_extensions = 1
        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        # Generating a different kind of error (non-IOError) to check the logging.
        # Trying to invoke IndexError during the getParameter call
        mock_read_file.return_value = ''

        with patch("azurelinuxagent.common.logger.periodic_warn") as patch_periodic_warn:
            expected_call_count = 1  # called only once at start, and then gets removed from the tracked data.
            for data_count in range(1, 10):
                CGroupsTelemetry.poll_all_tracked()
                self.assertEqual(expected_call_count, patch_periodic_warn.call_count)
Example #21
0
    def test_telemetry_polling_to_not_generate_transient_logs_ioerror_file_not_found(self, mock_read_file,
                                                                                     patch_periodic_warn, *args):
        num_extensions = 1
        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        self.assertEqual(0, patch_periodic_warn.call_count)

        # Not expecting logs present for io_error with errno=errno.ENOENT
        io_error_2 = IOError()
        io_error_2.errno = errno.ENOENT
        mock_read_file.side_effect = io_error_2

        poll_count = 1
        for data_count in range(poll_count, 10):
            CGroupsTelemetry.poll_all_tracked()
            self.assertEqual(0, patch_periodic_warn.call_count)
Example #22
0
        def start_extension_command(self, extension_name, command, shell, cwd,
                                    env, stdout, stderr):
            """
            Starts a command (install/enable/etc) for an extension and adds the command's PID to the extension's cgroup
            :param extension_name: The extension executing the command
            :param command: The command to invoke
            :param cwd: The working directory for the command
            :param env:  The environment to pass to the command's process
            :param stdout: File object to redirect stdout to
            :param stderr: File object to redirect stderr to
            """
            if not self.enabled():
                process = subprocess.Popen(command,
                                           shell=shell,
                                           cwd=cwd,
                                           env=env,
                                           stdout=stdout,
                                           stderr=stderr,
                                           preexec_fn=os.setsid)
            else:
                process, extension_cgroups = self._cgroups_api.start_extension_command(
                    extension_name,
                    command,
                    shell=shell,
                    cwd=cwd,
                    env=env,
                    stdout=stdout,
                    stderr=stderr)

                try:
                    for cgroup in extension_cgroups:
                        CGroupsTelemetry.track_cgroup(cgroup)
                except Exception as e:
                    logger.warn(
                        "Cannot add cgroup '{0}' to tracking list; resource usage will not be tracked. Error: {1}"
                        .format(cgroup.path, ustr(e)))

            return process
Example #23
0
    def test_extension_temetry_not_sent_for_empty_perf_metrics(self, *args):
        num_extensions = 5
        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        with patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry._process_cgroup_metric") as \
                patch_process_cgroup_metric:
            with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:

                patch_is_active.return_value = False
                patch_process_cgroup_metric.return_value = {}
                poll_count = 1

                for data_count in range(poll_count, 10):
                    CGroupsTelemetry.poll_all_tracked()

                collected_metrics = CGroupsTelemetry.report_all_tracked()
                self.assertEqual(0, len(collected_metrics))
Example #24
0
    def test_process_cgroup_metric_with_incorrect_cgroups_mounted(self, *args):
        num_extensions = 5
        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        with patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage:
            with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
                patch_get_cpu_usage.side_effect = Exception("File not found")
                patch_get_memory_usage.side_effect = Exception("File not found")

                for data_count in range(1, 10):
                    CGroupsTelemetry.poll_all_tracked()

                self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)

                collected_metrics = {}
                for name, cgroup_metrics in CGroupsTelemetry._cgroup_metrics.items():
                    collected_metrics[name] = CGroupsTelemetry._process_cgroup_metric(cgroup_metrics)
                    self.assertEqual(collected_metrics[name], {})  # empty
Example #25
0
    def start_extension_command(self, extension_name, command, timeout, shell, cwd, env, stdout, stderr,
                                error_code=ExtensionErrorCodes.PluginUnknownFailure):
        scope = "{0}_{1}".format(self._get_extension_cgroup_name(extension_name), uuid.uuid4())

        process = subprocess.Popen(
            "systemd-run --unit={0} --scope {1}".format(scope, command),
            shell=shell,
            cwd=cwd,
            stdout=stdout,
            stderr=stderr,
            env=env,
            preexec_fn=os.setsid)

        scope_name = scope + '.scope'

        logger.info("Started extension in unit '{0}'", scope_name)

        try:
            # systemd-run creates the scope under the system slice by default
            cgroup_relative_path = os.path.join('system.slice', scope_name)

            cpu_cgroup_mountpoint, memory_cgroup_mountpoint = self.get_cgroup_mount_points()

            if cpu_cgroup_mountpoint is None:
                logger.info("The CPU controller is not mounted; will not track resource usage")
            else:
                cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint, cgroup_relative_path)
                CGroupsTelemetry.track_cgroup(CpuCgroup(extension_name, cpu_cgroup_path))

            if memory_cgroup_mountpoint is None:
                logger.info("The memory controller is not mounted; will not track resource usage")
            else:
                memory_cgroup_path = os.path.join(memory_cgroup_mountpoint, cgroup_relative_path)
                CGroupsTelemetry.track_cgroup(MemoryCgroup(extension_name, memory_cgroup_path))

        except IOError as e:
            if e.errno == 2:  # 'No such file or directory'
                logger.info("The extension command already completed; will not track resource usage")
            logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(e))
        except Exception as e:
            logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(e))

        # Wait for process completion or timeout
        try:
            process_output = handle_process_completion(process=process,
                                                       command=command,
                                                       timeout=timeout,
                                                       stdout=stdout,
                                                       stderr=stderr,
                                                       error_code=error_code)
        except ExtensionError as e:
            # The extension didn't terminate successfully. Determine whether it was due to systemd errors or
            # extension errors.
            process_output = read_output(stdout, stderr)
            systemd_failure = self._is_systemd_failure(scope, process_output)

            if not systemd_failure:
                # There was an extension error; it either timed out or returned a non-zero exit code. Re-raise the error
                raise
            else:
                # There was an issue with systemd-run. We need to log it and retry the extension without systemd.
                err_msg = 'Systemd process exited with code %s and output %s' % (e.exit_code, process_output) \
                    if isinstance(e, ExtensionOperationError) else "Systemd timed-out, output: %s" % process_output
                event_msg = 'Failed to run systemd-run for unit {0}.scope. ' \
                            'Will retry invoking the extension without systemd. ' \
                            'Systemd-run error: {1}'.format(scope, err_msg)
                add_event(op=WALAEventOperation.InvokeCommandUsingSystemd, is_success=False, log_event=False, message=event_msg)
                logger.warn(event_msg)

                # Reset the stdout and stderr
                stdout.truncate(0)
                stderr.truncate(0)

                # Try invoking the process again, this time without systemd-run
                logger.info('Extension invocation using systemd failed, falling back to regular invocation '
                            'without cgroups tracking.')
                process = subprocess.Popen(command,
                                           shell=shell,
                                           cwd=cwd,
                                           env=env,
                                           stdout=stdout,
                                           stderr=stderr,
                                           preexec_fn=os.setsid)

                process_output = handle_process_completion(process=process,
                                                           command=command,
                                                           timeout=timeout,
                                                           stdout=stdout,
                                                           stderr=stderr,
                                                           error_code=error_code)

                return process_output

        # The process terminated in time and successfully
        return process_output
Example #26
0
    def start_extension_command(
            self,
            extension_name,
            command,
            cmd_name,
            timeout,
            shell,
            cwd,
            env,
            stdout,
            stderr,
            error_code=ExtensionErrorCodes.PluginUnknownFailure):
        scope = "{0}_{1}".format(cmd_name, uuid.uuid4())
        extension_slice_name = self.get_extension_slice_name(extension_name)
        with self._systemd_run_commands_lock:
            process = subprocess.Popen(  # pylint: disable=W1509
                "systemd-run --unit={0} --scope --slice={1} {2}".format(
                    scope, extension_slice_name, command),
                shell=shell,
                cwd=cwd,
                stdout=stdout,
                stderr=stderr,
                env=env,
                preexec_fn=os.setsid)

            # We start systemd-run with shell == True so process.pid is the shell's pid, not the pid for systemd-run
            self._systemd_run_commands.append(process.pid)

        scope_name = scope + '.scope'

        logger.info("Started extension in unit '{0}'", scope_name)

        try:
            cgroup_relative_path = os.path.join(
                'azure.slice/azure-vmextensions.slice', extension_slice_name)

            cpu_cgroup_mountpoint, _ = self.get_cgroup_mount_points()

            if cpu_cgroup_mountpoint is None:
                logger.info(
                    "The CPU controller is not mounted; will not track resource usage"
                )
            else:
                cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint,
                                               cgroup_relative_path)
                CGroupsTelemetry.track_cgroup(
                    CpuCgroup(extension_name, cpu_cgroup_path))

        except IOError as e:
            if e.errno == 2:  # 'No such file or directory'
                logger.info(
                    "The extension command already completed; will not track resource usage"
                )
            logger.info(
                "Failed to start tracking resource usage for the extension: {0}",
                ustr(e))
        except Exception as e:
            logger.info(
                "Failed to start tracking resource usage for the extension: {0}",
                ustr(e))

        # Wait for process completion or timeout
        try:
            return handle_process_completion(process=process,
                                             command=command,
                                             timeout=timeout,
                                             stdout=stdout,
                                             stderr=stderr,
                                             error_code=error_code)
        except ExtensionError as e:
            # The extension didn't terminate successfully. Determine whether it was due to systemd errors or
            # extension errors.
            if not self._is_systemd_failure(scope, stderr):
                # There was an extension error; it either timed out or returned a non-zero exit code. Re-raise the error
                raise

            # There was an issue with systemd-run. We need to log it and retry the extension without systemd.
            process_output = read_output(stdout, stderr)
            # Reset the stdout and stderr
            stdout.truncate(0)
            stderr.truncate(0)

            if isinstance(e, ExtensionOperationError):
                # no-member: Instance of 'ExtensionError' has no 'exit_code' member (no-member) - Disabled: e is actually an ExtensionOperationError
                err_msg = 'Systemd process exited with code %s and output %s' % (
                    e.exit_code, process_output)  # pylint: disable=no-member
            else:
                err_msg = "Systemd timed-out, output: %s" % process_output
            raise SystemdRunError(err_msg)
        finally:
            with self._systemd_run_commands_lock:
                self._systemd_run_commands.remove(process.pid)
Example #27
0
        def initialize(self):
            try:
                if self._initialized:
                    return

                #
                # check whether cgroup monitoring is supported on the current distro
                #
                self._cgroups_supported = CGroupsApi.cgroups_supported()
                if not self._cgroups_supported:
                    logger.info("Cgroup monitoring is not supported on {0}",
                                get_distro())
                    return

                #
                # check systemd
                #
                self._cgroups_api = CGroupsApi.create()

                if not isinstance(self._cgroups_api, SystemdCgroupsApi):
                    message = "systemd was not detected on {0}".format(
                        get_distro())
                    logger.warn(message)
                    add_event(op=WALAEventOperation.CGroupsInitialize,
                              is_success=False,
                              message=message,
                              log_event=False)
                    return

                def log_cgroup_info(format_string, *args):
                    message = format_string.format(*args)
                    logger.info(message)
                    add_event(op=WALAEventOperation.CGroupsInfo,
                              message=message)

                def log_cgroup_warn(format_string, *args):
                    message = format_string.format(*args)
                    logger.warn(message)
                    add_event(op=WALAEventOperation.CGroupsInfo,
                              message=message,
                              is_success=False,
                              log_event=False)

                log_cgroup_info("systemd version: {0}",
                                self._cgroups_api.get_systemd_version())

                #
                # Older versions of the daemon (2.2.31-2.2.40) wrote their PID to /sys/fs/cgroup/{cpu,memory}/WALinuxAgent/WALinuxAgent.  When running
                # under systemd this could produce invalid resource usage data. Do not enable cgroups under this condition.
                #
                legacy_cgroups = self._cgroups_api.cleanup_legacy_cgroups()

                if legacy_cgroups > 0:
                    log_cgroup_warn(
                        "The daemon's PID was added to a legacy cgroup; will not monitor resource usage."
                    )
                    return

                #
                # check v1 controllers
                #
                cpu_controller_root, memory_controller_root = self._cgroups_api.get_cgroup_mount_points(
                )

                if cpu_controller_root is not None:
                    logger.info("The CPU cgroup controller is mounted at {0}",
                                cpu_controller_root)
                else:
                    log_cgroup_warn("The CPU cgroup controller is not mounted")

                if memory_controller_root is not None:
                    logger.info(
                        "The memory cgroup controller is mounted at {0}",
                        memory_controller_root)
                else:
                    log_cgroup_warn(
                        "The memory cgroup controller is not mounted")

                #
                # check v2 controllers
                #
                cgroup2_mountpoint, cgroup2_controllers = self._cgroups_api.get_cgroup2_controllers(
                )
                if cgroup2_mountpoint is not None:
                    log_cgroup_warn(
                        "cgroups v2 mounted at {0}.  Controllers: [{1}]",
                        cgroup2_mountpoint, cgroup2_controllers)

                #
                # check the cgroups for the agent
                #
                agent_unit_name = self._cgroups_api.get_agent_unit_name()
                cpu_cgroup_relative_path, memory_cgroup_relative_path = self._cgroups_api.get_process_cgroup_relative_paths(
                    "self")
                if cpu_cgroup_relative_path is None:
                    log_cgroup_warn(
                        "The agent's process is not within a CPU cgroup")
                else:
                    cpu_accounting = self._cgroups_api.get_unit_property(
                        agent_unit_name, "CPUAccounting")
                    log_cgroup_info('CPUAccounting: {0}', cpu_accounting)

                if memory_cgroup_relative_path is None:
                    log_cgroup_warn(
                        "The agent's process is not within a memory cgroup")
                else:
                    memory_accounting = self._cgroups_api.get_unit_property(
                        agent_unit_name, "MemoryAccounting")
                    log_cgroup_info('MemoryAccounting: {0}', memory_accounting)

                #
                # All good, enable cgroups and start monitoring the agent
                #
                self._cgroups_enabled = True

                if cpu_controller_root is None or cpu_cgroup_relative_path is None:
                    logger.info("Will not track CPU for the agent's cgroup")
                else:
                    self._agent_cpu_cgroup_path = os.path.join(
                        cpu_controller_root, cpu_cgroup_relative_path)
                    CGroupsTelemetry.track_cgroup(
                        CpuCgroup(agent_unit_name,
                                  self._agent_cpu_cgroup_path))

                if memory_controller_root is None or memory_cgroup_relative_path is None:
                    logger.info("Will not track memory for the agent's cgroup")
                else:
                    self._agent_memory_cgroup_path = os.path.join(
                        memory_controller_root, memory_cgroup_relative_path)
                    CGroupsTelemetry.track_cgroup(
                        MemoryCgroup(agent_unit_name,
                                     self._agent_memory_cgroup_path))

                log_cgroup_info("Agent cgroups: CPU: {0} -- MEMORY: {1}",
                                self._agent_cpu_cgroup_path,
                                self._agent_memory_cgroup_path)

            except Exception as e:
                message = "Error initializing cgroups: {0}".format(ustr(e))
                logger.warn(message)
                add_event(op=WALAEventOperation.CGroupsInitialize,
                          is_success=False,
                          message=message,
                          log_event=False)
            finally:
                self._initialized = True
Example #28
0
    def test_generate_extension_metrics_telemetry_dictionary(self, *args):
        num_polls = 10
        num_extensions = 1
        num_summarization_values = 7

        cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)]

        # only verifying calculations and not validity of the values.
        memory_usage_values = [
            random.randint(0, 8 * 1024**3) for _ in range(num_polls)
        ]
        max_memory_usage_values = [
            random.randint(0, 8 * 1024**3) for _ in range(num_polls)
        ]

        # no need to initialize the CPU usage, since we mock get_cpu_usage() below
        with patch(
                "azurelinuxagent.common.cgroup.CpuCgroup.initialize_cpu_usage"
        ):
            for i in range(num_extensions):
                dummy_cpu_cgroup = CGroup.create(
                    "dummy_cpu_path_{0}".format(i), "cpu",
                    "dummy_extension_{0}".format(i))
                CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

                dummy_memory_cgroup = CGroup.create(
                    "dummy_memory_path_{0}".format(i), "memory",
                    "dummy_extension_{0}".format(i))
                CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked))

        with patch(
                "azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage"
        ) as patch_get_memory_max_usage:
            with patch(
                    "azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage"
            ) as patch_get_memory_usage:
                with patch(
                        "azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage"
                ) as patch_get_cpu_usage:
                    with patch("azurelinuxagent.common.cgroup.CGroup.is_active"
                               ) as patch_is_active:
                        for i in range(num_polls):
                            patch_is_active.return_value = True
                            patch_get_cpu_usage.return_value = cpu_percent_values[
                                i]
                            patch_get_memory_usage.return_value = memory_usage_values[
                                i]  # example 200 MB
                            patch_get_memory_max_usage.return_value = max_memory_usage_values[
                                i]  # example 450 MB
                            CGroupsTelemetry.poll_all_tracked()

        performance_metrics = CGroupsTelemetry.report_all_tracked()

        message_json = generate_extension_metrics_telemetry_dictionary(
            schema_version=1.0, performance_metrics=performance_metrics)

        for i in range(num_extensions):
            self.assertTrue(
                CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
            self.assertTrue(
                CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

        self.assertIn("SchemaVersion", message_json)
        self.assertIn("PerfMetrics", message_json)

        collected_metrics = message_json["PerfMetrics"]

        for i in range(num_extensions):
            extn_name = "dummy_extension_{0}".format(i)

            self.assertIn("memory", collected_metrics[extn_name])
            self.assertIn("cur_mem", collected_metrics[extn_name]["memory"])
            self.assertIn("max_mem", collected_metrics[extn_name]["memory"])
            self.assertEqual(
                len(collected_metrics[extn_name]["memory"]["cur_mem"]),
                num_summarization_values)
            self.assertEqual(
                len(collected_metrics[extn_name]["memory"]["max_mem"]),
                num_summarization_values)

            self.assertIsInstance(
                collected_metrics[extn_name]["memory"]["cur_mem"][5], str)
            self.assertIsInstance(
                collected_metrics[extn_name]["memory"]["cur_mem"][6], str)
            self.assertIsInstance(
                collected_metrics[extn_name]["memory"]["max_mem"][5], str)
            self.assertIsInstance(
                collected_metrics[extn_name]["memory"]["max_mem"][6], str)

            self.assertIn("cpu", collected_metrics[extn_name])
            self.assertIn("cur_cpu", collected_metrics[extn_name]["cpu"])
            self.assertEqual(
                len(collected_metrics[extn_name]["cpu"]["cur_cpu"]),
                num_summarization_values)

            self.assertIsInstance(
                collected_metrics[extn_name]["cpu"]["cur_cpu"][5], str)
            self.assertIsInstance(
                collected_metrics[extn_name]["cpu"]["cur_cpu"][6], str)

        message_json = generate_extension_metrics_telemetry_dictionary(
            schema_version=1.0, performance_metrics=None)
        self.assertIn("SchemaVersion", message_json)
        self.assertNotIn("PerfMetrics", message_json)

        message_json = generate_extension_metrics_telemetry_dictionary(
            schema_version=2.0, performance_metrics=None)
        self.assertEqual(message_json, None)

        message_json = generate_extension_metrics_telemetry_dictionary(
            schema_version="z", performance_metrics=None)
        self.assertEqual(message_json, None)