Beispiel #1
0
    def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args):
        num_extensions = 5

        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent:
            with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
                with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
                    patch_is_active.return_value = True
                    patch_get_memory_usage.side_effect = Exception("File not found")

                    current_cpu = 30
                    patch_get_cpu_percent.return_value = current_cpu

                    poll_count = 1

                    for data_count in range(poll_count, 10):
                        CGroupsTelemetry.poll_all_tracked()

                        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)
                        self._assert_cgroup_metrics_equal(cpu_usage=[current_cpu] * data_count, memory_usage=[], max_memory_usage=[])

                    CGroupsTelemetry.report_all_tracked()

                    self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)
                    self._assert_cgroup_metrics_equal([], [], [])
Beispiel #2
0
    def test_telemetry_polling_with_changing_cgroups_state(self, *args):
        num_extensions = 5
        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage:
            with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
                with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent:
                    with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
                        patch_is_active.return_value = True

                        no_extensions_expected = 0
                        expected_data_count = 2

                        current_cpu = 30
                        current_memory = 209715200
                        current_max_memory = 471859200

                        patch_get_cpu_percent.return_value = current_cpu
                        patch_get_memory_usage.return_value = current_memory  # example 200 MB
                        patch_get_memory_max_usage.return_value = current_max_memory  # example 450 MB

                        for i in range(num_extensions):
                            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
                            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

                        CGroupsTelemetry.poll_all_tracked()

                        for i in range(num_extensions):
                            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
                            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

                        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)

                        patch_is_active.return_value = False
                        CGroupsTelemetry.poll_all_tracked()

                        for i in range(num_extensions):
                            self.assertFalse(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
                            self.assertFalse(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

                        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)
                        self._assert_cgroup_metrics_equal(
                            cpu_usage=[current_cpu] * expected_data_count,
                            memory_usage=[current_memory] * expected_data_count,
                            max_memory_usage=[current_max_memory] * expected_data_count)

                        CGroupsTelemetry.report_all_tracked()

                        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), no_extensions_expected)
                        self._assert_cgroup_metrics_equal([], [], [])
    def test_telemetry_polling_with_inactive_cgroups(self, *args):
        num_extensions = 5
        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_max_usage") as patch_get_memory_max_usage:
            with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_usage") as patch_get_memory_usage:
                with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent:
                    with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
                        patch_is_active.return_value = False

                        no_extensions_expected = 0
                        data_count = 1
                        current_cpu = 30
                        current_memory = 209715200
                        current_max_memory = 471859200

                        patch_get_cpu_percent.return_value = current_cpu
                        patch_get_memory_usage.return_value = current_memory  # example 200 MB
                        patch_get_memory_max_usage.return_value = current_max_memory  # example 450 MB

                        for i in range(num_extensions):
                            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
                            self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

                        CGroupsTelemetry.poll_all_tracked()

                        for i in range(num_extensions):
                            self.assertFalse(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
                            self.assertFalse(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

                        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)

                        for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items():
                            current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics()

                            self.assertEqual(len(current_memory_usage._data), data_count)
                            self.assertListEqual(current_memory_usage._data, [current_memory] * data_count)
                            self.assertEqual(len(max_memory_levels._data), data_count)
                            self.assertListEqual(max_memory_levels._data, [current_max_memory] * data_count)
                            self.assertEqual(len(current_cpu_usage._data), data_count)
                            self.assertListEqual(current_cpu_usage._data, [current_cpu] * data_count)

                        CGroupsTelemetry.report_all_tracked()

                        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), no_extensions_expected)
                        for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items():
                            current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics()
                            self.assertEqual(len(current_memory_usage._data), 0)
                            self.assertEqual(len(max_memory_levels._data), 0)
                            self.assertEqual(len(current_cpu_usage._data), 0)
    def test_telemetry_polling_with_changing_cgroups_state(
            self, patch_get_statm, patch_is_active, patch_get_cpu_usage,
            patch_get_mem, patch_get_max_mem, *args):
        num_extensions = 5
        self._track_new_extension_cgroups(num_extensions)

        patch_is_active.return_value = True

        no_extensions_expected = 0
        expected_data_count = 1

        current_cpu = 30
        current_memory = 209715200
        current_max_memory = 471859200
        current_proc_statm = 20000000

        patch_get_cpu_usage.return_value = current_cpu
        patch_get_mem.return_value = current_memory  # example 200 MB
        patch_get_max_mem.return_value = current_max_memory  # example 450 MB
        patch_get_statm.return_value = current_proc_statm

        self._assert_cgroups_are_tracked(num_extensions)
        CGroupsTelemetry.poll_all_tracked()

        self._assert_cgroups_are_tracked(num_extensions)

        patch_is_active.return_value = False
        patch_get_cpu_usage.side_effect = raise_ioerror
        patch_get_mem.side_effect = raise_ioerror
        patch_get_max_mem.side_effect = raise_ioerror
        patch_get_statm.side_effect = raise_ioerror

        CGroupsTelemetry.poll_all_tracked()

        for i in range(num_extensions):
            self.assertFalse(
                CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
            self.assertFalse(
                CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(),
                         num_extensions)
        self._assert_calculated_resource_metrics_equal(
            cpu_usage=[current_cpu] * expected_data_count,
            memory_usage=[current_memory] * expected_data_count,
            max_memory_usage=[current_max_memory] * expected_data_count,
            proc_ids=TestCGroupsTelemetry.TestProcessIds,
            memory_statm_memory_usage=[current_proc_statm] *
            expected_data_count)

        CGroupsTelemetry.report_all_tracked()

        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(),
                         no_extensions_expected)
        self._assert_calculated_resource_metrics_equal([], [], [], [], [])
    def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args):
        num_extensions = 5
        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_max_usage") as patch_get_memory_max_usage:
            with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_usage") as patch_get_memory_usage:
                with patch("azurelinuxagent.common.cgroup.CpuCgroup.collect") as patch_cpu_cgroup_collect:
                    with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
                        patch_is_active.return_value = True

                        patch_cpu_cgroup_collect.side_effect = Exception("File not found")

                        current_memory = 209715200
                        current_max_memory = 471859200

                        patch_get_memory_usage.return_value = current_memory  # example 200 MB
                        patch_get_memory_max_usage.return_value = current_max_memory  # example 450 MB

                        poll_count = 1

                        for data_count in range(poll_count, 10):
                            CGroupsTelemetry.poll_all_tracked()
                            self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)

                            for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items():
                                current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics()
                                self.assertEqual(len(current_memory_usage._data), data_count)
                                self.assertListEqual(current_memory_usage._data, [current_memory] * data_count)
                                self.assertEqual(len(max_memory_levels._data), data_count)
                                self.assertListEqual(max_memory_levels._data, [current_max_memory] * data_count)

                                self.assertEqual(len(current_cpu_usage._data), 0)
                                self.assertListEqual(current_cpu_usage._data, [])

                        CGroupsTelemetry.report_all_tracked()

                        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)
                        for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items():
                            current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics()
                            self.assertEqual(len(current_memory_usage._data), 0)
                            self.assertEqual(len(max_memory_levels._data), 0)
                            self.assertEqual(len(current_cpu_usage._data), 0)
Beispiel #6
0
    def exercise_telemetry_instantiation(self, test_cgroup):
        test_extension_name = test_cgroup.name
        CGroupsTelemetry.track_cgroup(test_cgroup)
        self.assertIn('cpu', test_cgroup.cgroups)
        self.assertIn('memory', test_cgroup.cgroups)
        self.assertTrue(CGroupsTelemetry.is_tracked(test_extension_name))
        consume_cpu_time()
        time.sleep(1)
        metrics, limits = CGroupsTelemetry.report_all_tracked()
        my_metrics = metrics[test_extension_name]
        self.assertEqual(len(my_metrics), 2)
        for item in my_metrics:
            metric_family, metric_name, metric_value = item
            if metric_family == "Process":
                self.assertEqual(metric_name, "% Processor Time")
                self.assertGreater(metric_value, 0.0)
            elif metric_family == "Memory":
                self.assertEqual(metric_name, "Total Memory Usage")
                self.assertGreater(metric_value, 100000)
            else:
                self.fail("Unknown metric {0}/{1} value {2}".format(
                    metric_family, metric_name, metric_value))

        my_limits = limits[test_extension_name]
        self.assertIsInstance(my_limits,
                              CGroupsLimits,
                              msg="is not the correct instance")
        self.assertGreater(my_limits.cpu_limit, 0.0)
        self.assertGreater(my_limits.memory_limit, 0.0)
Beispiel #7
0
    def send_telemetry_metrics(self):
        """
        The send_telemetry_metrics would soon be removed in favor of sending performance metrics directly.

        :return:
        """
        time_now = datetime.datetime.utcnow()

        try:  # If there is an issue in reporting, it should not take down whole monitor thread.
            if not self.last_cgroup_report_telemetry:
                self.last_cgroup_report_telemetry = time_now

            if time_now >= (self.last_cgroup_report_telemetry +
                            MonitorHandler.CGROUP_TELEMETRY_REPORTING_PERIOD):
                performance_metrics = CGroupsTelemetry.report_all_tracked()
                self.last_cgroup_report_telemetry = time_now

                if performance_metrics:
                    message = generate_extension_metrics_telemetry_dictionary(
                        schema_version=1.0,
                        performance_metrics=performance_metrics)
                    add_event(name=AGENT_NAME,
                              version=CURRENT_VERSION,
                              op=WALAEventOperation.ExtensionMetricsData,
                              is_success=True,
                              message=ustr(message),
                              log_event=False)
        except Exception as e:
            logger.warn(
                "Could not report all the tracked telemetry due to {0}",
                ustr(e))
    def test_telemetry_polling_with_inactive_cgroups(self, *_):
        num_extensions = 5
        no_extensions_expected = 0

        self._track_new_extension_cgroups(num_extensions)
        self._assert_cgroups_are_tracked(num_extensions)

        metrics = CGroupsTelemetry.poll_all_tracked()

        for i in range(num_extensions):
            self.assertFalse(
                CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
            self.assertFalse(
                CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(),
                         num_extensions)
        self._assert_calculated_resource_metrics_equal([], [], [], [],
                                                       proc_ids=None)
        self.assertEqual(len(metrics), 0)

        collected_metrics = CGroupsTelemetry.report_all_tracked()
        self._assert_extension_metrics_data(collected_metrics,
                                            num_extensions, [], [], [], [],
                                            is_cpu_present=False,
                                            is_memory_present=False)
        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(),
                         no_extensions_expected)
        self._assert_calculated_resource_metrics_equal([], [], [], [], [])
Beispiel #9
0
    def test_telemetry_calculations(self, *args):
        num_polls = 10
        num_extensions = 1
        num_summarization_values = 7

        cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)]

        # only verifying calculations and not validity of the values.
        memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)]
        max_memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)]

        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked))

        with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage:
            with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
                with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent:
                    with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
                        for i in range(num_polls):
                            patch_is_active.return_value = True
                            patch_get_cpu_percent.return_value = cpu_percent_values[i]
                            patch_get_memory_usage.return_value = memory_usage_values[i]  # example 200 MB
                            patch_get_memory_max_usage.return_value = max_memory_usage_values[i]  # example 450 MB
                            CGroupsTelemetry.poll_all_tracked()

        collected_metrics = CGroupsTelemetry.report_all_tracked()
        for i in range(num_extensions):
            name = "dummy_extension_{0}".format(i)

            self.assertIn(name, collected_metrics)
            self.assertIn("memory", collected_metrics[name])
            self.assertIn("cur_mem", collected_metrics[name]["memory"])
            self.assertIn("max_mem", collected_metrics[name]["memory"])
            self.assertEqual(num_summarization_values, len(collected_metrics[name]["memory"]["cur_mem"]))
            self.assertEqual(num_summarization_values, len(collected_metrics[name]["memory"]["max_mem"]))

            self.assertListEqual(generate_metric_list(memory_usage_values),
                                 collected_metrics[name]["memory"]["cur_mem"][0:5])
            self.assertListEqual(generate_metric_list(max_memory_usage_values),
                                 collected_metrics[name]["memory"]["max_mem"][0:5])

            self.assertIn("cpu", collected_metrics[name])
            self.assertIn("cur_cpu", collected_metrics[name]["cpu"])
            self.assertEqual(num_summarization_values, len(collected_metrics[name]["cpu"]["cur_cpu"]))
            self.assertListEqual(generate_metric_list(cpu_percent_values),
                                 collected_metrics[name]["cpu"]["cur_cpu"][0:5])
    def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args):
        num_extensions = 5

        self._track_new_extension_cgroups(num_extensions)

        with patch(
                "azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage"
        ) as patch_get_memory_max_usage:
            with patch(
                    "azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage"
            ) as patch_get_memory_usage:
                with patch("azurelinuxagent.common.cgroup.CGroup.is_active"
                           ) as patch_is_active:
                    patch_is_active.return_value = True

                    current_memory = 209715200
                    current_max_memory = 471859200

                    patch_get_memory_usage.return_value = current_memory  # example 200 MB
                    patch_get_memory_max_usage.return_value = current_max_memory  # example 450 MB
                    num_polls = 10
                    for data_count in range(1, num_polls + 1):
                        metrics = CGroupsTelemetry.poll_all_tracked()
                        self.assertEqual(len(CGroupsTelemetry._cgroup_metrics),
                                         num_extensions)
                        self._assert_calculated_resource_metrics_equal(
                            cpu_usage=[],
                            memory_usage=[current_memory] * data_count,
                            max_memory_usage=[current_max_memory] * data_count,
                            memory_statm_memory_usage=[
                                TestCGroupsTelemetry.TestProcStatmMemoryUsed
                            ] * data_count,
                            proc_ids=TestCGroupsTelemetry.TestProcessIds)
                        # Memory is only populated, CPU is not. Thus 5 metrics per cgroup.
                        self.assertEqual(len(metrics), num_extensions * 5)
                        self._assert_polled_metrics_equal(
                            metrics, 0, current_memory, current_max_memory,
                            TestCGroupsTelemetry.TestProcStatmMemoryUsed)

                    collected_metrics = CGroupsTelemetry.report_all_tracked()
                    self._assert_extension_metrics_data(
                        collected_metrics,
                        num_extensions, [],
                        [TestCGroupsTelemetry.TestProcStatmMemoryUsed] *
                        num_polls, [current_memory] * num_polls,
                        [current_max_memory] * num_polls,
                        is_cpu_present=False)

                    self.assertEqual(len(CGroupsTelemetry._cgroup_metrics),
                                     num_extensions)
                    self._assert_calculated_resource_metrics_equal([], [], [],
                                                                   [], [])
    def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args):
        num_extensions = 5
        self._track_new_extension_cgroups(num_extensions)

        with patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage"
                   ) as patch_get_cpu_usage:
            with patch("azurelinuxagent.common.cgroup.CGroup.is_active"
                       ) as patch_is_active:
                patch_is_active.return_value = True

                current_cpu = 30
                patch_get_cpu_usage.return_value = current_cpu

                poll_count = 1

                for data_count in range(poll_count, 10):
                    metrics = CGroupsTelemetry.poll_all_tracked()

                    self.assertEqual(
                        CGroupsTelemetry._cgroup_metrics.__len__(),
                        num_extensions)
                    self._assert_calculated_resource_metrics_equal(
                        cpu_usage=[current_cpu] * data_count,
                        memory_usage=[],
                        max_memory_usage=[],
                        proc_ids=[],
                        memory_statm_memory_usage=[])
                    self.assertEqual(len(metrics),
                                     num_extensions * 1)  # Only CPU populated
                    self._assert_polled_metrics_equal(metrics, current_cpu, 0,
                                                      0, 0)

                CGroupsTelemetry.report_all_tracked()

                self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(),
                                 num_extensions)
                self._assert_calculated_resource_metrics_equal([], [], [], [],
                                                               [])
Beispiel #12
0
    def send_telemetry_metrics(self):
        """
        The send_telemetry_metrics would soon be removed in favor of sending performance metrics directly.
        """
        performance_metrics = CGroupsTelemetry.report_all_tracked()

        if performance_metrics:
            message = generate_extension_metrics_telemetry_dictionary(
                schema_version=1.0, performance_metrics=performance_metrics)
            add_event(name=AGENT_NAME,
                      version=CURRENT_VERSION,
                      op=WALAEventOperation.ExtensionMetricsData,
                      is_success=True,
                      message=ustr(message),
                      log_event=False)
    def test_telemetry_calculations(self, patch_get_statm, patch_is_active,
                                    patch_get_cpu_usage,
                                    patch_get_memory_usage,
                                    patch_get_memory_max_usage, *args):
        num_polls = 10
        num_extensions = 1

        cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)]

        # only verifying calculations and not validity of the values.
        memory_usage_values = [
            random.randint(0, 8 * 1024**3) for _ in range(num_polls)
        ]
        max_memory_usage_values = [
            random.randint(0, 8 * 1024**3) for _ in range(num_polls)
        ]
        proc_stat_memory_usage_values = [
            random.randint(0, 8 * 1024**3) for _ in range(num_polls)
        ]

        self._track_new_extension_cgroups(num_extensions)
        self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked))

        for i in range(num_polls):
            patch_is_active.return_value = True
            patch_get_cpu_usage.return_value = cpu_percent_values[i]
            patch_get_memory_usage.return_value = memory_usage_values[
                i]  # example 200 MB
            patch_get_memory_max_usage.return_value = max_memory_usage_values[
                i]  # example 450 MB
            patch_get_statm.return_value = proc_stat_memory_usage_values[i]

            metrics = CGroupsTelemetry.poll_all_tracked()

            # 1 CPU metric + 1 Current Memory + 1 Max memory + num_processes (3) * memory from statm
            self.assertEqual(len(metrics), 6 * num_extensions)
            self._assert_polled_metrics_equal(metrics, cpu_percent_values[i],
                                              memory_usage_values[i],
                                              max_memory_usage_values[i],
                                              proc_stat_memory_usage_values[i])

        collected_metrics = CGroupsTelemetry.report_all_tracked()
        self._assert_extension_metrics_data(collected_metrics, num_extensions,
                                            cpu_percent_values,
                                            proc_stat_memory_usage_values,
                                            memory_usage_values,
                                            max_memory_usage_values)
    def test_extension_telemetry_not_sent_for_empty_perf_metrics(self, *args):
        num_extensions = 5
        self._track_new_extension_cgroups(num_extensions)

        with patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry._process_cgroup_metric") as \
                patch_process_cgroup_metric:
            with patch("azurelinuxagent.common.cgroup.CGroup.is_active"
                       ) as patch_is_active:

                patch_is_active.return_value = False
                patch_process_cgroup_metric.return_value = {}
                poll_count = 1

                for data_count in range(poll_count, 10):
                    metrics = CGroupsTelemetry.poll_all_tracked()
                    self.assertEqual(0, len(metrics))

                collected_metrics = CGroupsTelemetry.report_all_tracked()
                self.assertEqual(0, len(collected_metrics))
    def send_telemetry_metrics(self):
        time_now = datetime.datetime.utcnow()

        if not self.last_cgroup_report_telemetry:
            self.last_cgroup_report_telemetry = time_now

        if time_now >= (self.last_cgroup_report_telemetry +
                        MonitorHandler.CGROUP_TELEMETRY_REPORTING_PERIOD):
            performance_metrics = CGroupsTelemetry.report_all_tracked()
            self.last_cgroup_report_telemetry = time_now

            if performance_metrics:
                message = generate_extension_metrics_telemetry_dictionary(
                    schema_version=1.0,
                    performance_metrics=performance_metrics)
                add_event(name=AGENT_NAME,
                          version=CURRENT_VERSION,
                          op=WALAEventOperation.ExtensionMetricsData,
                          is_success=True,
                          message=ustr(message),
                          log_event=False)
Beispiel #16
0
    def test_extension_temetry_not_sent_for_empty_perf_metrics(self, *args):
        num_extensions = 5
        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        with patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry._process_cgroup_metric") as \
                patch_process_cgroup_metric:
            with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:

                patch_is_active.return_value = False
                patch_process_cgroup_metric.return_value = {}
                poll_count = 1

                for data_count in range(poll_count, 10):
                    CGroupsTelemetry.poll_all_tracked()

                collected_metrics = CGroupsTelemetry.report_all_tracked()
                self.assertEqual(0, len(collected_metrics))
Beispiel #17
0
    def test_generate_extension_metrics_telemetry_dictionary(self, *args):
        num_polls = 10
        num_extensions = 1
        num_summarization_values = 7

        cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)]

        # only verifying calculations and not validity of the values.
        memory_usage_values = [
            random.randint(0, 8 * 1024**3) for _ in range(num_polls)
        ]
        max_memory_usage_values = [
            random.randint(0, 8 * 1024**3) for _ in range(num_polls)
        ]

        # no need to initialize the CPU usage, since we mock get_cpu_usage() below
        with patch(
                "azurelinuxagent.common.cgroup.CpuCgroup.initialize_cpu_usage"
        ):
            for i in range(num_extensions):
                dummy_cpu_cgroup = CGroup.create(
                    "dummy_cpu_path_{0}".format(i), "cpu",
                    "dummy_extension_{0}".format(i))
                CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

                dummy_memory_cgroup = CGroup.create(
                    "dummy_memory_path_{0}".format(i), "memory",
                    "dummy_extension_{0}".format(i))
                CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked))

        with patch(
                "azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage"
        ) as patch_get_memory_max_usage:
            with patch(
                    "azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage"
            ) as patch_get_memory_usage:
                with patch(
                        "azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage"
                ) as patch_get_cpu_usage:
                    with patch("azurelinuxagent.common.cgroup.CGroup.is_active"
                               ) as patch_is_active:
                        for i in range(num_polls):
                            patch_is_active.return_value = True
                            patch_get_cpu_usage.return_value = cpu_percent_values[
                                i]
                            patch_get_memory_usage.return_value = memory_usage_values[
                                i]  # example 200 MB
                            patch_get_memory_max_usage.return_value = max_memory_usage_values[
                                i]  # example 450 MB
                            CGroupsTelemetry.poll_all_tracked()

        performance_metrics = CGroupsTelemetry.report_all_tracked()

        message_json = generate_extension_metrics_telemetry_dictionary(
            schema_version=1.0, performance_metrics=performance_metrics)

        for i in range(num_extensions):
            self.assertTrue(
                CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
            self.assertTrue(
                CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

        self.assertIn("SchemaVersion", message_json)
        self.assertIn("PerfMetrics", message_json)

        collected_metrics = message_json["PerfMetrics"]

        for i in range(num_extensions):
            extn_name = "dummy_extension_{0}".format(i)

            self.assertIn("memory", collected_metrics[extn_name])
            self.assertIn("cur_mem", collected_metrics[extn_name]["memory"])
            self.assertIn("max_mem", collected_metrics[extn_name]["memory"])
            self.assertEqual(
                len(collected_metrics[extn_name]["memory"]["cur_mem"]),
                num_summarization_values)
            self.assertEqual(
                len(collected_metrics[extn_name]["memory"]["max_mem"]),
                num_summarization_values)

            self.assertIsInstance(
                collected_metrics[extn_name]["memory"]["cur_mem"][5], str)
            self.assertIsInstance(
                collected_metrics[extn_name]["memory"]["cur_mem"][6], str)
            self.assertIsInstance(
                collected_metrics[extn_name]["memory"]["max_mem"][5], str)
            self.assertIsInstance(
                collected_metrics[extn_name]["memory"]["max_mem"][6], str)

            self.assertIn("cpu", collected_metrics[extn_name])
            self.assertIn("cur_cpu", collected_metrics[extn_name]["cpu"])
            self.assertEqual(
                len(collected_metrics[extn_name]["cpu"]["cur_cpu"]),
                num_summarization_values)

            self.assertIsInstance(
                collected_metrics[extn_name]["cpu"]["cur_cpu"][5], str)
            self.assertIsInstance(
                collected_metrics[extn_name]["cpu"]["cur_cpu"][6], str)

        message_json = generate_extension_metrics_telemetry_dictionary(
            schema_version=1.0, performance_metrics=None)
        self.assertIn("SchemaVersion", message_json)
        self.assertNotIn("PerfMetrics", message_json)

        message_json = generate_extension_metrics_telemetry_dictionary(
            schema_version=2.0, performance_metrics=None)
        self.assertEqual(message_json, None)

        message_json = generate_extension_metrics_telemetry_dictionary(
            schema_version="z", performance_metrics=None)
        self.assertEqual(message_json, None)
Beispiel #18
0
    def test_telemetry_with_tracked_cgroup(self):
        self.assertTrue(i_am_root(), "Test does not run when non-root")

        # This test has some timing issues when systemd is managing cgroups, so we force the file system API
        # by creating a new instance of the CGroupConfigurator
        with patch("azurelinuxagent.common.cgroupapi.CGroupsApi._is_systemd", return_value=False):
            cgroup_configurator_instance = CGroupConfigurator._instance
            CGroupConfigurator._instance = None

            try:
                max_num_polls = 30
                time_to_wait = 3
                extn_name = "foobar-1.0.0"
                num_summarization_values = 7

                cgs = make_new_cgroup(extn_name)
                self.assertEqual(len(cgs), 2)

                ext_handler_properties = ExtHandlerProperties()
                ext_handler_properties.version = "1.0.0"
                self.ext_handler = ExtHandler(name='foobar')
                self.ext_handler.properties = ext_handler_properties
                self.ext_handler_instance = ExtHandlerInstance(ext_handler=self.ext_handler, protocol=None)

                command = self.create_script("keep_cpu_busy_and_consume_memory_for_5_seconds", '''
nohup python -c "import time

for i in range(5):
    x = [1, 2, 3, 4, 5] * (i * 1000)
    time.sleep({0})
    x *= 0
    print('Test loop')" &
'''.format(time_to_wait))

                self.log_dir = os.path.join(self.tmp_dir, "log")

                with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_base_dir", lambda *_: self.tmp_dir) as \
                        patch_get_base_dir:
                    with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_log_dir", lambda *_: self.log_dir) as \
                            patch_get_log_dir:
                        self.ext_handler_instance.launch_command(command)

                #
                # If the test is made to run using the systemd API, then the paths of the cgroups need to be checked differently:
                #
                #     self.assertEquals(len(CGroupsTelemetry._tracked), 2)
                #     cpu = os.path.join(BASE_CGROUPS, "cpu", "system.slice", r"foobar_1.0.0_.*\.scope")
                #     self.assertTrue(any(re.match(cpu, tracked.path) for tracked in CGroupsTelemetry._tracked))
                #     memory = os.path.join(BASE_CGROUPS, "memory", "system.slice", r"foobar_1.0.0_.*\.scope")
                #     self.assertTrue(any(re.match(memory, tracked.path) for tracked in CGroupsTelemetry._tracked))
                #
                self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join(
                    BASE_CGROUPS, "cpu", "walinuxagent.extensions", "foobar_1.0.0")))
                self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join(
                    BASE_CGROUPS, "memory", "walinuxagent.extensions", "foobar_1.0.0")))

                for i in range(max_num_polls):
                    CGroupsTelemetry.poll_all_tracked()
                    time.sleep(0.5)

                collected_metrics = CGroupsTelemetry.report_all_tracked()

                self.assertIn("memory", collected_metrics[extn_name])
                self.assertIn("cur_mem", collected_metrics[extn_name]["memory"])
                self.assertIn("max_mem", collected_metrics[extn_name]["memory"])
                self.assertEqual(len(collected_metrics[extn_name]["memory"]["cur_mem"]), num_summarization_values)
                self.assertEqual(len(collected_metrics[extn_name]["memory"]["max_mem"]), num_summarization_values)

                self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][5], str)
                self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][6], str)
                self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][5], str)
                self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][6], str)

                self.assertIn("cpu", collected_metrics[extn_name])
                self.assertIn("cur_cpu", collected_metrics[extn_name]["cpu"])
                self.assertEqual(len(collected_metrics[extn_name]["cpu"]["cur_cpu"]), num_summarization_values)

                self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][5], str)
                self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][6], str)

                for i in range(5):
                    self.assertGreater(collected_metrics[extn_name]["memory"]["cur_mem"][i], 0)
                    self.assertGreater(collected_metrics[extn_name]["memory"]["max_mem"][i], 0)
                    self.assertGreaterEqual(collected_metrics[extn_name]["cpu"]["cur_cpu"][i], 0)
                    # Equal because CPU could be zero for minimum value.
            finally:
                CGroupConfigurator._instance = cgroup_configurator_instance
    def test_telemetry_with_tracked_cgroup(self, *_):
        self.assertTrue(i_am_root(), "Test does not run when non-root")
        CGroupConfigurator._instance = None

        max_num_polls = 30
        time_to_wait = 3
        extn_name = "foobar-1.0.0"
        num_summarization_values = 7

        cgs = make_new_cgroup(extn_name)
        self.assertEqual(len(cgs), 2)

        ext_handler_properties = ExtHandlerProperties()
        ext_handler_properties.version = "1.0.0"
        self.ext_handler = ExtHandler(name='foobar')
        self.ext_handler.properties = ext_handler_properties
        self.ext_handler_instance = ExtHandlerInstance(
            ext_handler=self.ext_handler, protocol=None)

        command = self.create_script(
            "keep_cpu_busy_and_consume_memory_for_5_seconds", '''
nohup python -c "import time

for i in range(5):
    x = [1, 2, 3, 4, 5] * (i * 1000)
    time.sleep({0})
    x *= 0
    print('Test loop')" &
'''.format(time_to_wait))

        self.log_dir = os.path.join(self.tmp_dir, "log")

        with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_base_dir", lambda *_: self.tmp_dir) as \
                patch_get_base_dir:
            with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_log_dir", lambda *_: self.log_dir) as \
                    patch_get_log_dir:
                self.ext_handler_instance.launch_command(command)

        self.assertTrue(
            CGroupsTelemetry.is_tracked(
                os.path.join(BASE_CGROUPS, "cpu", "walinuxagent.extensions",
                             "foobar_1.0.0")))
        self.assertTrue(
            CGroupsTelemetry.is_tracked(
                os.path.join(BASE_CGROUPS, "memory", "walinuxagent.extensions",
                             "foobar_1.0.0")))

        for i in range(max_num_polls):
            CGroupsTelemetry.poll_all_tracked()
            time.sleep(0.5)

        collected_metrics = CGroupsTelemetry.report_all_tracked()

        self.assertIn("memory", collected_metrics[extn_name])
        self.assertIn("cur_mem", collected_metrics[extn_name]["memory"])
        self.assertIn("max_mem", collected_metrics[extn_name]["memory"])
        self.assertEqual(
            len(collected_metrics[extn_name]["memory"]["cur_mem"]),
            num_summarization_values)
        self.assertEqual(
            len(collected_metrics[extn_name]["memory"]["max_mem"]),
            num_summarization_values)

        self.assertIsInstance(
            collected_metrics[extn_name]["memory"]["cur_mem"][5], str)
        self.assertIsInstance(
            collected_metrics[extn_name]["memory"]["cur_mem"][6], str)
        self.assertIsInstance(
            collected_metrics[extn_name]["memory"]["max_mem"][5], str)
        self.assertIsInstance(
            collected_metrics[extn_name]["memory"]["max_mem"][6], str)

        self.assertIn("cpu", collected_metrics[extn_name])
        self.assertIn("cur_cpu", collected_metrics[extn_name]["cpu"])
        self.assertEqual(len(collected_metrics[extn_name]["cpu"]["cur_cpu"]),
                         num_summarization_values)

        self.assertIsInstance(
            collected_metrics[extn_name]["cpu"]["cur_cpu"][5], str)
        self.assertIsInstance(
            collected_metrics[extn_name]["cpu"]["cur_cpu"][6], str)

        for i in range(5):
            self.assertGreater(
                collected_metrics[extn_name]["memory"]["cur_mem"][i], 0)
            self.assertGreater(
                collected_metrics[extn_name]["memory"]["max_mem"][i], 0)
            self.assertGreaterEqual(
                collected_metrics[extn_name]["cpu"]["cur_cpu"][i], 0)
    def test_telemetry_polling_with_active_cgroups(self, *args):
        num_extensions = 3

        self._track_new_extension_cgroups(num_extensions)

        with patch(
                "azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage"
        ) as patch_get_memory_max_usage:
            with patch(
                    "azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage"
            ) as patch_get_memory_usage:
                with patch(
                        "azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage"
                ) as patch_get_cpu_usage:
                    with patch("azurelinuxagent.common.cgroup.CGroup.is_active"
                               ) as patch_is_active:
                        patch_is_active.return_value = True

                        current_cpu = 30
                        current_memory = 209715200
                        current_max_memory = 471859200
                        current_proc_statm = TestCGroupsTelemetry.TestProcStatmMemoryUsed

                        # 1 CPU metric + 1 Current Memory + 1 Max memor + num_processes * memory from statm
                        num_of_metrics_per_extn_expected = 1 + 1 + 1 + 3 * 1
                        patch_get_cpu_usage.return_value = current_cpu
                        patch_get_memory_usage.return_value = current_memory  # example 200 MB
                        patch_get_memory_max_usage.return_value = current_max_memory  # example 450 MB
                        num_polls = 10

                        for data_count in range(1, num_polls + 1):
                            metrics = CGroupsTelemetry.poll_all_tracked()

                            self.assertEqual(
                                len(CGroupsTelemetry._cgroup_metrics),
                                num_extensions)
                            self._assert_calculated_resource_metrics_equal(
                                cpu_usage=[current_cpu] * data_count,
                                memory_usage=[current_memory] * data_count,
                                max_memory_usage=[current_max_memory] *
                                data_count,
                                proc_ids=TestCGroupsTelemetry.TestProcessIds,
                                memory_statm_memory_usage=[current_proc_statm
                                                           ] * data_count)
                            self.assertEqual(
                                len(metrics), num_extensions *
                                num_of_metrics_per_extn_expected)
                            self._assert_polled_metrics_equal(
                                metrics, current_cpu, current_memory,
                                current_max_memory, current_proc_statm)

        collected_metrics = CGroupsTelemetry.report_all_tracked()

        self._assert_extension_metrics_data(
            collected_metrics,
            num_extensions, [current_cpu] * num_polls,
            [TestCGroupsTelemetry.TestProcStatmMemoryUsed] * num_polls,
            [current_memory] * num_polls, [current_max_memory] * num_polls,
            is_cpu_present=False)

        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(),
                         num_extensions)
        self._assert_calculated_resource_metrics_equal([], [], [], [], [])