def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent: with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = True patch_get_memory_usage.side_effect = Exception("File not found") current_cpu = 30 patch_get_cpu_percent.return_value = current_cpu poll_count = 1 for data_count in range(poll_count, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_cgroup_metrics_equal(cpu_usage=[current_cpu] * data_count, memory_usage=[], max_memory_usage=[]) CGroupsTelemetry.report_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_cgroup_metrics_equal([], [], [])
def test_telemetry_polling_with_changing_cgroups_state(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = True no_extensions_expected = 0 expected_data_count = 2 current_cpu = 30 current_memory = 209715200 current_max_memory = 471859200 patch_get_cpu_percent.return_value = current_cpu patch_get_memory_usage.return_value = current_memory # example 200 MB patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB for i in range(num_extensions): self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) patch_is_active.return_value = False CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertFalse(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertFalse(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_cgroup_metrics_equal( cpu_usage=[current_cpu] * expected_data_count, memory_usage=[current_memory] * expected_data_count, max_memory_usage=[current_max_memory] * expected_data_count) CGroupsTelemetry.report_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), no_extensions_expected) self._assert_cgroup_metrics_equal([], [], [])
def test_telemetry_polling_with_inactive_cgroups(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_max_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = False no_extensions_expected = 0 data_count = 1 current_cpu = 30 current_memory = 209715200 current_max_memory = 471859200 patch_get_cpu_percent.return_value = current_cpu patch_get_memory_usage.return_value = current_memory # example 200 MB patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB for i in range(num_extensions): self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertFalse(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertFalse(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items(): current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics() self.assertEqual(len(current_memory_usage._data), data_count) self.assertListEqual(current_memory_usage._data, [current_memory] * data_count) self.assertEqual(len(max_memory_levels._data), data_count) self.assertListEqual(max_memory_levels._data, [current_max_memory] * data_count) self.assertEqual(len(current_cpu_usage._data), data_count) self.assertListEqual(current_cpu_usage._data, [current_cpu] * data_count) CGroupsTelemetry.report_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), no_extensions_expected) for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items(): current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics() self.assertEqual(len(current_memory_usage._data), 0) self.assertEqual(len(max_memory_levels._data), 0) self.assertEqual(len(current_cpu_usage._data), 0)
def test_telemetry_polling_with_changing_cgroups_state( self, patch_get_statm, patch_is_active, patch_get_cpu_usage, patch_get_mem, patch_get_max_mem, *args): num_extensions = 5 self._track_new_extension_cgroups(num_extensions) patch_is_active.return_value = True no_extensions_expected = 0 expected_data_count = 1 current_cpu = 30 current_memory = 209715200 current_max_memory = 471859200 current_proc_statm = 20000000 patch_get_cpu_usage.return_value = current_cpu patch_get_mem.return_value = current_memory # example 200 MB patch_get_max_mem.return_value = current_max_memory # example 450 MB patch_get_statm.return_value = current_proc_statm self._assert_cgroups_are_tracked(num_extensions) CGroupsTelemetry.poll_all_tracked() self._assert_cgroups_are_tracked(num_extensions) patch_is_active.return_value = False patch_get_cpu_usage.side_effect = raise_ioerror patch_get_mem.side_effect = raise_ioerror patch_get_max_mem.side_effect = raise_ioerror patch_get_statm.side_effect = raise_ioerror CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertFalse( CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertFalse( CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_calculated_resource_metrics_equal( cpu_usage=[current_cpu] * expected_data_count, memory_usage=[current_memory] * expected_data_count, max_memory_usage=[current_max_memory] * expected_data_count, proc_ids=TestCGroupsTelemetry.TestProcessIds, memory_statm_memory_usage=[current_proc_statm] * expected_data_count) CGroupsTelemetry.report_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), no_extensions_expected) self._assert_calculated_resource_metrics_equal([], [], [], [], [])
def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_max_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CpuCgroup.collect") as patch_cpu_cgroup_collect: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = True patch_cpu_cgroup_collect.side_effect = Exception("File not found") current_memory = 209715200 current_max_memory = 471859200 patch_get_memory_usage.return_value = current_memory # example 200 MB patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB poll_count = 1 for data_count in range(poll_count, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items(): current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics() self.assertEqual(len(current_memory_usage._data), data_count) self.assertListEqual(current_memory_usage._data, [current_memory] * data_count) self.assertEqual(len(max_memory_levels._data), data_count) self.assertListEqual(max_memory_levels._data, [current_max_memory] * data_count) self.assertEqual(len(current_cpu_usage._data), 0) self.assertListEqual(current_cpu_usage._data, []) CGroupsTelemetry.report_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items(): current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics() self.assertEqual(len(current_memory_usage._data), 0) self.assertEqual(len(max_memory_levels._data), 0) self.assertEqual(len(current_cpu_usage._data), 0)
def exercise_telemetry_instantiation(self, test_cgroup): test_extension_name = test_cgroup.name CGroupsTelemetry.track_cgroup(test_cgroup) self.assertIn('cpu', test_cgroup.cgroups) self.assertIn('memory', test_cgroup.cgroups) self.assertTrue(CGroupsTelemetry.is_tracked(test_extension_name)) consume_cpu_time() time.sleep(1) metrics, limits = CGroupsTelemetry.report_all_tracked() my_metrics = metrics[test_extension_name] self.assertEqual(len(my_metrics), 2) for item in my_metrics: metric_family, metric_name, metric_value = item if metric_family == "Process": self.assertEqual(metric_name, "% Processor Time") self.assertGreater(metric_value, 0.0) elif metric_family == "Memory": self.assertEqual(metric_name, "Total Memory Usage") self.assertGreater(metric_value, 100000) else: self.fail("Unknown metric {0}/{1} value {2}".format( metric_family, metric_name, metric_value)) my_limits = limits[test_extension_name] self.assertIsInstance(my_limits, CGroupsLimits, msg="is not the correct instance") self.assertGreater(my_limits.cpu_limit, 0.0) self.assertGreater(my_limits.memory_limit, 0.0)
def send_telemetry_metrics(self): """ The send_telemetry_metrics would soon be removed in favor of sending performance metrics directly. :return: """ time_now = datetime.datetime.utcnow() try: # If there is an issue in reporting, it should not take down whole monitor thread. if not self.last_cgroup_report_telemetry: self.last_cgroup_report_telemetry = time_now if time_now >= (self.last_cgroup_report_telemetry + MonitorHandler.CGROUP_TELEMETRY_REPORTING_PERIOD): performance_metrics = CGroupsTelemetry.report_all_tracked() self.last_cgroup_report_telemetry = time_now if performance_metrics: message = generate_extension_metrics_telemetry_dictionary( schema_version=1.0, performance_metrics=performance_metrics) add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.ExtensionMetricsData, is_success=True, message=ustr(message), log_event=False) except Exception as e: logger.warn( "Could not report all the tracked telemetry due to {0}", ustr(e))
def test_telemetry_polling_with_inactive_cgroups(self, *_): num_extensions = 5 no_extensions_expected = 0 self._track_new_extension_cgroups(num_extensions) self._assert_cgroups_are_tracked(num_extensions) metrics = CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertFalse( CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertFalse( CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_calculated_resource_metrics_equal([], [], [], [], proc_ids=None) self.assertEqual(len(metrics), 0) collected_metrics = CGroupsTelemetry.report_all_tracked() self._assert_extension_metrics_data(collected_metrics, num_extensions, [], [], [], [], is_cpu_present=False, is_memory_present=False) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), no_extensions_expected) self._assert_calculated_resource_metrics_equal([], [], [], [], [])
def test_telemetry_calculations(self, *args): num_polls = 10 num_extensions = 1 num_summarization_values = 7 cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)] # only verifying calculations and not validity of the values. memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)] max_memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)] for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked)) with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: for i in range(num_polls): patch_is_active.return_value = True patch_get_cpu_percent.return_value = cpu_percent_values[i] patch_get_memory_usage.return_value = memory_usage_values[i] # example 200 MB patch_get_memory_max_usage.return_value = max_memory_usage_values[i] # example 450 MB CGroupsTelemetry.poll_all_tracked() collected_metrics = CGroupsTelemetry.report_all_tracked() for i in range(num_extensions): name = "dummy_extension_{0}".format(i) self.assertIn(name, collected_metrics) self.assertIn("memory", collected_metrics[name]) self.assertIn("cur_mem", collected_metrics[name]["memory"]) self.assertIn("max_mem", collected_metrics[name]["memory"]) self.assertEqual(num_summarization_values, len(collected_metrics[name]["memory"]["cur_mem"])) self.assertEqual(num_summarization_values, len(collected_metrics[name]["memory"]["max_mem"])) self.assertListEqual(generate_metric_list(memory_usage_values), collected_metrics[name]["memory"]["cur_mem"][0:5]) self.assertListEqual(generate_metric_list(max_memory_usage_values), collected_metrics[name]["memory"]["max_mem"][0:5]) self.assertIn("cpu", collected_metrics[name]) self.assertIn("cur_cpu", collected_metrics[name]["cpu"]) self.assertEqual(num_summarization_values, len(collected_metrics[name]["cpu"]["cur_cpu"])) self.assertListEqual(generate_metric_list(cpu_percent_values), collected_metrics[name]["cpu"]["cur_cpu"][0:5])
def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): num_extensions = 5 self._track_new_extension_cgroups(num_extensions) with patch( "azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage" ) as patch_get_memory_max_usage: with patch( "azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage" ) as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CGroup.is_active" ) as patch_is_active: patch_is_active.return_value = True current_memory = 209715200 current_max_memory = 471859200 patch_get_memory_usage.return_value = current_memory # example 200 MB patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB num_polls = 10 for data_count in range(1, num_polls + 1): metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual(len(CGroupsTelemetry._cgroup_metrics), num_extensions) self._assert_calculated_resource_metrics_equal( cpu_usage=[], memory_usage=[current_memory] * data_count, max_memory_usage=[current_max_memory] * data_count, memory_statm_memory_usage=[ TestCGroupsTelemetry.TestProcStatmMemoryUsed ] * data_count, proc_ids=TestCGroupsTelemetry.TestProcessIds) # Memory is only populated, CPU is not. Thus 5 metrics per cgroup. self.assertEqual(len(metrics), num_extensions * 5) self._assert_polled_metrics_equal( metrics, 0, current_memory, current_max_memory, TestCGroupsTelemetry.TestProcStatmMemoryUsed) collected_metrics = CGroupsTelemetry.report_all_tracked() self._assert_extension_metrics_data( collected_metrics, num_extensions, [], [TestCGroupsTelemetry.TestProcStatmMemoryUsed] * num_polls, [current_memory] * num_polls, [current_max_memory] * num_polls, is_cpu_present=False) self.assertEqual(len(CGroupsTelemetry._cgroup_metrics), num_extensions) self._assert_calculated_resource_metrics_equal([], [], [], [], [])
def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): num_extensions = 5 self._track_new_extension_cgroups(num_extensions) with patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage" ) as patch_get_cpu_usage: with patch("azurelinuxagent.common.cgroup.CGroup.is_active" ) as patch_is_active: patch_is_active.return_value = True current_cpu = 30 patch_get_cpu_usage.return_value = current_cpu poll_count = 1 for data_count in range(poll_count, 10): metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual( CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_calculated_resource_metrics_equal( cpu_usage=[current_cpu] * data_count, memory_usage=[], max_memory_usage=[], proc_ids=[], memory_statm_memory_usage=[]) self.assertEqual(len(metrics), num_extensions * 1) # Only CPU populated self._assert_polled_metrics_equal(metrics, current_cpu, 0, 0, 0) CGroupsTelemetry.report_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_calculated_resource_metrics_equal([], [], [], [], [])
def send_telemetry_metrics(self): """ The send_telemetry_metrics would soon be removed in favor of sending performance metrics directly. """ performance_metrics = CGroupsTelemetry.report_all_tracked() if performance_metrics: message = generate_extension_metrics_telemetry_dictionary( schema_version=1.0, performance_metrics=performance_metrics) add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.ExtensionMetricsData, is_success=True, message=ustr(message), log_event=False)
def test_telemetry_calculations(self, patch_get_statm, patch_is_active, patch_get_cpu_usage, patch_get_memory_usage, patch_get_memory_max_usage, *args): num_polls = 10 num_extensions = 1 cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)] # only verifying calculations and not validity of the values. memory_usage_values = [ random.randint(0, 8 * 1024**3) for _ in range(num_polls) ] max_memory_usage_values = [ random.randint(0, 8 * 1024**3) for _ in range(num_polls) ] proc_stat_memory_usage_values = [ random.randint(0, 8 * 1024**3) for _ in range(num_polls) ] self._track_new_extension_cgroups(num_extensions) self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked)) for i in range(num_polls): patch_is_active.return_value = True patch_get_cpu_usage.return_value = cpu_percent_values[i] patch_get_memory_usage.return_value = memory_usage_values[ i] # example 200 MB patch_get_memory_max_usage.return_value = max_memory_usage_values[ i] # example 450 MB patch_get_statm.return_value = proc_stat_memory_usage_values[i] metrics = CGroupsTelemetry.poll_all_tracked() # 1 CPU metric + 1 Current Memory + 1 Max memory + num_processes (3) * memory from statm self.assertEqual(len(metrics), 6 * num_extensions) self._assert_polled_metrics_equal(metrics, cpu_percent_values[i], memory_usage_values[i], max_memory_usage_values[i], proc_stat_memory_usage_values[i]) collected_metrics = CGroupsTelemetry.report_all_tracked() self._assert_extension_metrics_data(collected_metrics, num_extensions, cpu_percent_values, proc_stat_memory_usage_values, memory_usage_values, max_memory_usage_values)
def test_extension_telemetry_not_sent_for_empty_perf_metrics(self, *args): num_extensions = 5 self._track_new_extension_cgroups(num_extensions) with patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry._process_cgroup_metric") as \ patch_process_cgroup_metric: with patch("azurelinuxagent.common.cgroup.CGroup.is_active" ) as patch_is_active: patch_is_active.return_value = False patch_process_cgroup_metric.return_value = {} poll_count = 1 for data_count in range(poll_count, 10): metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual(0, len(metrics)) collected_metrics = CGroupsTelemetry.report_all_tracked() self.assertEqual(0, len(collected_metrics))
def send_telemetry_metrics(self): time_now = datetime.datetime.utcnow() if not self.last_cgroup_report_telemetry: self.last_cgroup_report_telemetry = time_now if time_now >= (self.last_cgroup_report_telemetry + MonitorHandler.CGROUP_TELEMETRY_REPORTING_PERIOD): performance_metrics = CGroupsTelemetry.report_all_tracked() self.last_cgroup_report_telemetry = time_now if performance_metrics: message = generate_extension_metrics_telemetry_dictionary( schema_version=1.0, performance_metrics=performance_metrics) add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.ExtensionMetricsData, is_success=True, message=ustr(message), log_event=False)
def test_extension_temetry_not_sent_for_empty_perf_metrics(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry._process_cgroup_metric") as \ patch_process_cgroup_metric: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = False patch_process_cgroup_metric.return_value = {} poll_count = 1 for data_count in range(poll_count, 10): CGroupsTelemetry.poll_all_tracked() collected_metrics = CGroupsTelemetry.report_all_tracked() self.assertEqual(0, len(collected_metrics))
def test_generate_extension_metrics_telemetry_dictionary(self, *args): num_polls = 10 num_extensions = 1 num_summarization_values = 7 cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)] # only verifying calculations and not validity of the values. memory_usage_values = [ random.randint(0, 8 * 1024**3) for _ in range(num_polls) ] max_memory_usage_values = [ random.randint(0, 8 * 1024**3) for _ in range(num_polls) ] # no need to initialize the CPU usage, since we mock get_cpu_usage() below with patch( "azurelinuxagent.common.cgroup.CpuCgroup.initialize_cpu_usage" ): for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create( "dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create( "dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked)) with patch( "azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage" ) as patch_get_memory_max_usage: with patch( "azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage" ) as patch_get_memory_usage: with patch( "azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage" ) as patch_get_cpu_usage: with patch("azurelinuxagent.common.cgroup.CGroup.is_active" ) as patch_is_active: for i in range(num_polls): patch_is_active.return_value = True patch_get_cpu_usage.return_value = cpu_percent_values[ i] patch_get_memory_usage.return_value = memory_usage_values[ i] # example 200 MB patch_get_memory_max_usage.return_value = max_memory_usage_values[ i] # example 450 MB CGroupsTelemetry.poll_all_tracked() performance_metrics = CGroupsTelemetry.report_all_tracked() message_json = generate_extension_metrics_telemetry_dictionary( schema_version=1.0, performance_metrics=performance_metrics) for i in range(num_extensions): self.assertTrue( CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertTrue( CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertIn("SchemaVersion", message_json) self.assertIn("PerfMetrics", message_json) collected_metrics = message_json["PerfMetrics"] for i in range(num_extensions): extn_name = "dummy_extension_{0}".format(i) self.assertIn("memory", collected_metrics[extn_name]) self.assertIn("cur_mem", collected_metrics[extn_name]["memory"]) self.assertIn("max_mem", collected_metrics[extn_name]["memory"]) self.assertEqual( len(collected_metrics[extn_name]["memory"]["cur_mem"]), num_summarization_values) self.assertEqual( len(collected_metrics[extn_name]["memory"]["max_mem"]), num_summarization_values) self.assertIsInstance( collected_metrics[extn_name]["memory"]["cur_mem"][5], str) self.assertIsInstance( collected_metrics[extn_name]["memory"]["cur_mem"][6], str) self.assertIsInstance( collected_metrics[extn_name]["memory"]["max_mem"][5], str) self.assertIsInstance( collected_metrics[extn_name]["memory"]["max_mem"][6], str) self.assertIn("cpu", collected_metrics[extn_name]) self.assertIn("cur_cpu", collected_metrics[extn_name]["cpu"]) self.assertEqual( len(collected_metrics[extn_name]["cpu"]["cur_cpu"]), num_summarization_values) self.assertIsInstance( collected_metrics[extn_name]["cpu"]["cur_cpu"][5], str) self.assertIsInstance( collected_metrics[extn_name]["cpu"]["cur_cpu"][6], str) message_json = generate_extension_metrics_telemetry_dictionary( schema_version=1.0, performance_metrics=None) self.assertIn("SchemaVersion", message_json) self.assertNotIn("PerfMetrics", message_json) message_json = generate_extension_metrics_telemetry_dictionary( schema_version=2.0, performance_metrics=None) self.assertEqual(message_json, None) message_json = generate_extension_metrics_telemetry_dictionary( schema_version="z", performance_metrics=None) self.assertEqual(message_json, None)
def test_telemetry_with_tracked_cgroup(self): self.assertTrue(i_am_root(), "Test does not run when non-root") # This test has some timing issues when systemd is managing cgroups, so we force the file system API # by creating a new instance of the CGroupConfigurator with patch("azurelinuxagent.common.cgroupapi.CGroupsApi._is_systemd", return_value=False): cgroup_configurator_instance = CGroupConfigurator._instance CGroupConfigurator._instance = None try: max_num_polls = 30 time_to_wait = 3 extn_name = "foobar-1.0.0" num_summarization_values = 7 cgs = make_new_cgroup(extn_name) self.assertEqual(len(cgs), 2) ext_handler_properties = ExtHandlerProperties() ext_handler_properties.version = "1.0.0" self.ext_handler = ExtHandler(name='foobar') self.ext_handler.properties = ext_handler_properties self.ext_handler_instance = ExtHandlerInstance(ext_handler=self.ext_handler, protocol=None) command = self.create_script("keep_cpu_busy_and_consume_memory_for_5_seconds", ''' nohup python -c "import time for i in range(5): x = [1, 2, 3, 4, 5] * (i * 1000) time.sleep({0}) x *= 0 print('Test loop')" & '''.format(time_to_wait)) self.log_dir = os.path.join(self.tmp_dir, "log") with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_base_dir", lambda *_: self.tmp_dir) as \ patch_get_base_dir: with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_log_dir", lambda *_: self.log_dir) as \ patch_get_log_dir: self.ext_handler_instance.launch_command(command) # # If the test is made to run using the systemd API, then the paths of the cgroups need to be checked differently: # # self.assertEquals(len(CGroupsTelemetry._tracked), 2) # cpu = os.path.join(BASE_CGROUPS, "cpu", "system.slice", r"foobar_1.0.0_.*\.scope") # self.assertTrue(any(re.match(cpu, tracked.path) for tracked in CGroupsTelemetry._tracked)) # memory = os.path.join(BASE_CGROUPS, "memory", "system.slice", r"foobar_1.0.0_.*\.scope") # self.assertTrue(any(re.match(memory, tracked.path) for tracked in CGroupsTelemetry._tracked)) # self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join( BASE_CGROUPS, "cpu", "walinuxagent.extensions", "foobar_1.0.0"))) self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join( BASE_CGROUPS, "memory", "walinuxagent.extensions", "foobar_1.0.0"))) for i in range(max_num_polls): CGroupsTelemetry.poll_all_tracked() time.sleep(0.5) collected_metrics = CGroupsTelemetry.report_all_tracked() self.assertIn("memory", collected_metrics[extn_name]) self.assertIn("cur_mem", collected_metrics[extn_name]["memory"]) self.assertIn("max_mem", collected_metrics[extn_name]["memory"]) self.assertEqual(len(collected_metrics[extn_name]["memory"]["cur_mem"]), num_summarization_values) self.assertEqual(len(collected_metrics[extn_name]["memory"]["max_mem"]), num_summarization_values) self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][5], str) self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][6], str) self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][5], str) self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][6], str) self.assertIn("cpu", collected_metrics[extn_name]) self.assertIn("cur_cpu", collected_metrics[extn_name]["cpu"]) self.assertEqual(len(collected_metrics[extn_name]["cpu"]["cur_cpu"]), num_summarization_values) self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][5], str) self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][6], str) for i in range(5): self.assertGreater(collected_metrics[extn_name]["memory"]["cur_mem"][i], 0) self.assertGreater(collected_metrics[extn_name]["memory"]["max_mem"][i], 0) self.assertGreaterEqual(collected_metrics[extn_name]["cpu"]["cur_cpu"][i], 0) # Equal because CPU could be zero for minimum value. finally: CGroupConfigurator._instance = cgroup_configurator_instance
def test_telemetry_with_tracked_cgroup(self, *_): self.assertTrue(i_am_root(), "Test does not run when non-root") CGroupConfigurator._instance = None max_num_polls = 30 time_to_wait = 3 extn_name = "foobar-1.0.0" num_summarization_values = 7 cgs = make_new_cgroup(extn_name) self.assertEqual(len(cgs), 2) ext_handler_properties = ExtHandlerProperties() ext_handler_properties.version = "1.0.0" self.ext_handler = ExtHandler(name='foobar') self.ext_handler.properties = ext_handler_properties self.ext_handler_instance = ExtHandlerInstance( ext_handler=self.ext_handler, protocol=None) command = self.create_script( "keep_cpu_busy_and_consume_memory_for_5_seconds", ''' nohup python -c "import time for i in range(5): x = [1, 2, 3, 4, 5] * (i * 1000) time.sleep({0}) x *= 0 print('Test loop')" & '''.format(time_to_wait)) self.log_dir = os.path.join(self.tmp_dir, "log") with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_base_dir", lambda *_: self.tmp_dir) as \ patch_get_base_dir: with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_log_dir", lambda *_: self.log_dir) as \ patch_get_log_dir: self.ext_handler_instance.launch_command(command) self.assertTrue( CGroupsTelemetry.is_tracked( os.path.join(BASE_CGROUPS, "cpu", "walinuxagent.extensions", "foobar_1.0.0"))) self.assertTrue( CGroupsTelemetry.is_tracked( os.path.join(BASE_CGROUPS, "memory", "walinuxagent.extensions", "foobar_1.0.0"))) for i in range(max_num_polls): CGroupsTelemetry.poll_all_tracked() time.sleep(0.5) collected_metrics = CGroupsTelemetry.report_all_tracked() self.assertIn("memory", collected_metrics[extn_name]) self.assertIn("cur_mem", collected_metrics[extn_name]["memory"]) self.assertIn("max_mem", collected_metrics[extn_name]["memory"]) self.assertEqual( len(collected_metrics[extn_name]["memory"]["cur_mem"]), num_summarization_values) self.assertEqual( len(collected_metrics[extn_name]["memory"]["max_mem"]), num_summarization_values) self.assertIsInstance( collected_metrics[extn_name]["memory"]["cur_mem"][5], str) self.assertIsInstance( collected_metrics[extn_name]["memory"]["cur_mem"][6], str) self.assertIsInstance( collected_metrics[extn_name]["memory"]["max_mem"][5], str) self.assertIsInstance( collected_metrics[extn_name]["memory"]["max_mem"][6], str) self.assertIn("cpu", collected_metrics[extn_name]) self.assertIn("cur_cpu", collected_metrics[extn_name]["cpu"]) self.assertEqual(len(collected_metrics[extn_name]["cpu"]["cur_cpu"]), num_summarization_values) self.assertIsInstance( collected_metrics[extn_name]["cpu"]["cur_cpu"][5], str) self.assertIsInstance( collected_metrics[extn_name]["cpu"]["cur_cpu"][6], str) for i in range(5): self.assertGreater( collected_metrics[extn_name]["memory"]["cur_mem"][i], 0) self.assertGreater( collected_metrics[extn_name]["memory"]["max_mem"][i], 0) self.assertGreaterEqual( collected_metrics[extn_name]["cpu"]["cur_cpu"][i], 0)
def test_telemetry_polling_with_active_cgroups(self, *args): num_extensions = 3 self._track_new_extension_cgroups(num_extensions) with patch( "azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage" ) as patch_get_memory_max_usage: with patch( "azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage" ) as patch_get_memory_usage: with patch( "azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage" ) as patch_get_cpu_usage: with patch("azurelinuxagent.common.cgroup.CGroup.is_active" ) as patch_is_active: patch_is_active.return_value = True current_cpu = 30 current_memory = 209715200 current_max_memory = 471859200 current_proc_statm = TestCGroupsTelemetry.TestProcStatmMemoryUsed # 1 CPU metric + 1 Current Memory + 1 Max memor + num_processes * memory from statm num_of_metrics_per_extn_expected = 1 + 1 + 1 + 3 * 1 patch_get_cpu_usage.return_value = current_cpu patch_get_memory_usage.return_value = current_memory # example 200 MB patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB num_polls = 10 for data_count in range(1, num_polls + 1): metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual( len(CGroupsTelemetry._cgroup_metrics), num_extensions) self._assert_calculated_resource_metrics_equal( cpu_usage=[current_cpu] * data_count, memory_usage=[current_memory] * data_count, max_memory_usage=[current_max_memory] * data_count, proc_ids=TestCGroupsTelemetry.TestProcessIds, memory_statm_memory_usage=[current_proc_statm ] * data_count) self.assertEqual( len(metrics), num_extensions * num_of_metrics_per_extn_expected) self._assert_polled_metrics_equal( metrics, current_cpu, current_memory, current_max_memory, current_proc_statm) collected_metrics = CGroupsTelemetry.report_all_tracked() self._assert_extension_metrics_data( collected_metrics, num_extensions, [current_cpu] * num_polls, [TestCGroupsTelemetry.TestProcStatmMemoryUsed] * num_polls, [current_memory] * num_polls, [current_max_memory] * num_polls, is_cpu_present=False) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_calculated_resource_metrics_equal([], [], [], [], [])