def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent: with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = True patch_get_memory_usage.side_effect = Exception("File not found") current_cpu = 30 patch_get_cpu_percent.return_value = current_cpu poll_count = 1 for data_count in range(poll_count, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_cgroup_metrics_equal(cpu_usage=[current_cpu] * data_count, memory_usage=[], max_memory_usage=[]) CGroupsTelemetry.report_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_cgroup_metrics_equal([], [], [])
def test_telemetry_polling_to_generate_transient_logs_ioerror_permission_denied(self, mock_read_file, patch_periodic_warn, *args): num_extensions = 1 num_controllers = 2 is_active_check_per_controller = 2 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) self.assertEqual(0, patch_periodic_warn.call_count) # Expecting logs to be present for different kind of errors io_error_3 = IOError() io_error_3.errno = errno.EPERM mock_read_file.side_effect = io_error_3 poll_count = 1 expected_count_per_call = num_controllers + is_active_check_per_controller # each collect per controller would generate a log statement, and each cgroup would invoke a # is active check raising an exception for data_count in range(poll_count, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(poll_count * expected_count_per_call, patch_periodic_warn.call_count)
def test_generate_extension_metrics_telemetry_dictionary(self, *args): # pylint: disable=unused-argument num_polls = 10 num_extensions = 1 cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)] # only verifying calculations and not validity of the values. memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)] max_memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)] # no need to initialize the CPU usage, since we mock get_cpu_usage() below with patch("azurelinuxagent.common.cgroup.CpuCgroup.initialize_cpu_usage"): for i in range(num_extensions): dummy_cpu_cgroup = CpuCgroup("dummy_extension_{0}".format(i), "dummy_cpu_path_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = MemoryCgroup("dummy_extension_{0}".format(i), "dummy_memory_path_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked)) with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: for i in range(num_polls): patch_is_active.return_value = True patch_get_cpu_usage.return_value = cpu_percent_values[i] patch_get_memory_usage.return_value = memory_usage_values[i] # example 200 MB patch_get_memory_max_usage.return_value = max_memory_usage_values[i] # example 450 MB CGroupsTelemetry.poll_all_tracked()
def poll_telemetry_metrics(self): time_now = datetime.datetime.utcnow() if not self.last_cgroup_polling_telemetry: self.last_cgroup_polling_telemetry = time_now if time_now >= (self.last_cgroup_polling_telemetry + MonitorHandler.CGROUP_TELEMETRY_POLLING_PERIOD): CGroupsTelemetry.poll_all_tracked() self.last_cgroup_polling_telemetry = time_now
def test_telemetry_polling_with_changing_cgroups_state(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = True no_extensions_expected = 0 expected_data_count = 2 current_cpu = 30 current_memory = 209715200 current_max_memory = 471859200 patch_get_cpu_percent.return_value = current_cpu patch_get_memory_usage.return_value = current_memory # example 200 MB patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB for i in range(num_extensions): self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) patch_is_active.return_value = False CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertFalse(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertFalse(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_cgroup_metrics_equal( cpu_usage=[current_cpu] * expected_data_count, memory_usage=[current_memory] * expected_data_count, max_memory_usage=[current_max_memory] * expected_data_count) CGroupsTelemetry.report_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), no_extensions_expected) self._assert_cgroup_metrics_equal([], [], [])
def test_telemetry_polling_with_inactive_cgroups(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_max_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = False no_extensions_expected = 0 data_count = 1 current_cpu = 30 current_memory = 209715200 current_max_memory = 471859200 patch_get_cpu_percent.return_value = current_cpu patch_get_memory_usage.return_value = current_memory # example 200 MB patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB for i in range(num_extensions): self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertFalse(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertFalse(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items(): current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics() self.assertEqual(len(current_memory_usage._data), data_count) self.assertListEqual(current_memory_usage._data, [current_memory] * data_count) self.assertEqual(len(max_memory_levels._data), data_count) self.assertListEqual(max_memory_levels._data, [current_max_memory] * data_count) self.assertEqual(len(current_cpu_usage._data), data_count) self.assertListEqual(current_cpu_usage._data, [current_cpu] * data_count) CGroupsTelemetry.report_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), no_extensions_expected) for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items(): current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics() self.assertEqual(len(current_memory_usage._data), 0) self.assertEqual(len(max_memory_levels._data), 0) self.assertEqual(len(current_cpu_usage._data), 0)
def test_telemetry_polling_with_changing_cgroups_state( self, patch_get_statm, patch_is_active, patch_get_cpu_usage, patch_get_mem, patch_get_max_mem, *args): num_extensions = 5 self._track_new_extension_cgroups(num_extensions) patch_is_active.return_value = True no_extensions_expected = 0 expected_data_count = 1 current_cpu = 30 current_memory = 209715200 current_max_memory = 471859200 current_proc_statm = 20000000 patch_get_cpu_usage.return_value = current_cpu patch_get_mem.return_value = current_memory # example 200 MB patch_get_max_mem.return_value = current_max_memory # example 450 MB patch_get_statm.return_value = current_proc_statm self._assert_cgroups_are_tracked(num_extensions) CGroupsTelemetry.poll_all_tracked() self._assert_cgroups_are_tracked(num_extensions) patch_is_active.return_value = False patch_get_cpu_usage.side_effect = raise_ioerror patch_get_mem.side_effect = raise_ioerror patch_get_max_mem.side_effect = raise_ioerror patch_get_statm.side_effect = raise_ioerror CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertFalse( CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertFalse( CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_calculated_resource_metrics_equal( cpu_usage=[current_cpu] * expected_data_count, memory_usage=[current_memory] * expected_data_count, max_memory_usage=[current_max_memory] * expected_data_count, proc_ids=TestCGroupsTelemetry.TestProcessIds, memory_statm_memory_usage=[current_proc_statm] * expected_data_count) CGroupsTelemetry.report_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), no_extensions_expected) self._assert_calculated_resource_metrics_equal([], [], [], [], [])
def test_telemetry_polling_to_generate_transient_logs_index_error(self): num_extensions = 1 self._track_new_extension_cgroups(num_extensions) # Generating a different kind of error (non-IOError) to check the logging. # Trying to invoke IndexError during the getParameter call with patch("azurelinuxagent.common.utils.fileutil.read_file", return_value=''): with patch("azurelinuxagent.common.logger.periodic_warn") as patch_periodic_warn: expected_call_count = 2 # 1 periodic warning for the cpu cgroups, and 1 for memory for data_count in range(1, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(expected_call_count, patch_periodic_warn.call_count)
def test_telemetry_calculations(self, *args): num_polls = 10 num_extensions = 1 num_summarization_values = 7 cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)] # only verifying calculations and not validity of the values. memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)] max_memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)] for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked)) with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: for i in range(num_polls): patch_is_active.return_value = True patch_get_cpu_percent.return_value = cpu_percent_values[i] patch_get_memory_usage.return_value = memory_usage_values[i] # example 200 MB patch_get_memory_max_usage.return_value = max_memory_usage_values[i] # example 450 MB CGroupsTelemetry.poll_all_tracked() collected_metrics = CGroupsTelemetry.report_all_tracked() for i in range(num_extensions): name = "dummy_extension_{0}".format(i) self.assertIn(name, collected_metrics) self.assertIn("memory", collected_metrics[name]) self.assertIn("cur_mem", collected_metrics[name]["memory"]) self.assertIn("max_mem", collected_metrics[name]["memory"]) self.assertEqual(num_summarization_values, len(collected_metrics[name]["memory"]["cur_mem"])) self.assertEqual(num_summarization_values, len(collected_metrics[name]["memory"]["max_mem"])) self.assertListEqual(generate_metric_list(memory_usage_values), collected_metrics[name]["memory"]["cur_mem"][0:5]) self.assertListEqual(generate_metric_list(max_memory_usage_values), collected_metrics[name]["memory"]["max_mem"][0:5]) self.assertIn("cpu", collected_metrics[name]) self.assertIn("cur_cpu", collected_metrics[name]["cpu"]) self.assertEqual(num_summarization_values, len(collected_metrics[name]["cpu"]["cur_cpu"])) self.assertListEqual(generate_metric_list(cpu_percent_values), collected_metrics[name]["cpu"]["cur_cpu"][0:5])
def test_telemetry_polling_to_not_generate_transient_logs_ioerror_file_not_found(self, patch_periodic_warn): num_extensions = 1 self._track_new_extension_cgroups(num_extensions) self.assertEqual(0, patch_periodic_warn.call_count) # Not expecting logs present for io_error with errno=errno.ENOENT io_error_2 = IOError() io_error_2.errno = errno.ENOENT with patch("azurelinuxagent.common.utils.fileutil.read_file", side_effect=io_error_2): poll_count = 1 for data_count in range(poll_count, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(0, patch_periodic_warn.call_count)
def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_max_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CpuCgroup.collect") as patch_cpu_cgroup_collect: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = True patch_cpu_cgroup_collect.side_effect = Exception("File not found") current_memory = 209715200 current_max_memory = 471859200 patch_get_memory_usage.return_value = current_memory # example 200 MB patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB poll_count = 1 for data_count in range(poll_count, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items(): current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics() self.assertEqual(len(current_memory_usage._data), data_count) self.assertListEqual(current_memory_usage._data, [current_memory] * data_count) self.assertEqual(len(max_memory_levels._data), data_count) self.assertListEqual(max_memory_levels._data, [current_max_memory] * data_count) self.assertEqual(len(current_cpu_usage._data), 0) self.assertListEqual(current_cpu_usage._data, []) CGroupsTelemetry.report_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items(): current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics() self.assertEqual(len(current_memory_usage._data), 0) self.assertEqual(len(max_memory_levels._data), 0) self.assertEqual(len(current_cpu_usage._data), 0)
def test_telemetry_calculations(self, patch_is_active, patch_get_cpu_usage, patch_get_memory_usage, patch_get_memory_max_usage, *args): # pylint: disable=unused-argument num_polls = 10 num_extensions = 1 cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)] # only verifying calculations and not validity of the values. memory_usage_values = [ random.randint(0, 8 * 1024**3) for _ in range(num_polls) ] max_memory_usage_values = [ random.randint(0, 8 * 1024**3) for _ in range(num_polls) ] self._track_new_extension_cgroups(num_extensions) self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked)) # pylint: disable=protected-access for i in range(num_polls): patch_is_active.return_value = True patch_get_cpu_usage.return_value = cpu_percent_values[i] patch_get_memory_usage.return_value = memory_usage_values[ i] # example 200 MB patch_get_memory_max_usage.return_value = max_memory_usage_values[ i] # example 450 MB metrics = CGroupsTelemetry.poll_all_tracked() # 1 CPU metric + 1 Current Memory + 1 Max memory self.assertEqual(len(metrics), 3 * num_extensions) self._assert_polled_metrics_equal(metrics, cpu_percent_values[i], memory_usage_values[i], max_memory_usage_values[i])
def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): # pylint: disable=unused-argument num_extensions = 5 self._track_new_extension_cgroups(num_extensions) with patch( "azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage" ) as patch_get_memory_max_usage: with patch( "azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage" ) as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CGroup.is_active" ) as patch_is_active: patch_is_active.return_value = True current_memory = 209715200 current_max_memory = 471859200 patch_get_memory_usage.return_value = current_memory # example 200 MB patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB num_polls = 10 for data_count in range(1, num_polls + 1): # pylint: disable=unused-variable metrics = CGroupsTelemetry.poll_all_tracked() # Memory is only populated, CPU is not. Thus 2 metrics per cgroup. self.assertEqual(len(metrics), num_extensions * 2) self._assert_polled_metrics_equal( metrics, 0, current_memory, current_max_memory)
def test_telemetry_polling_with_inactive_cgroups(self, *_): num_extensions = 5 no_extensions_expected = 0 self._track_new_extension_cgroups(num_extensions) self._assert_cgroups_are_tracked(num_extensions) metrics = CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertFalse( CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertFalse( CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_calculated_resource_metrics_equal([], [], [], [], proc_ids=None) self.assertEqual(len(metrics), 0) collected_metrics = CGroupsTelemetry.report_all_tracked() self._assert_extension_metrics_data(collected_metrics, num_extensions, [], [], [], [], is_cpu_present=False, is_memory_present=False) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), no_extensions_expected) self._assert_calculated_resource_metrics_equal([], [], [], [], [])
def _operation(self): tracked_metrics = CGroupsTelemetry.poll_all_tracked() for metric in tracked_metrics: report_metric(metric.category, metric.counter, metric.instance, metric.value, log_event=self.__log_metrics) CGroupConfigurator.get_instance().check_cgroups(tracked_metrics)
def test_telemetry_polling_with_active_cgroups(self, *args): num_extensions = 3 self._track_new_extension_cgroups(num_extensions) with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = True current_cpu = 30 current_memory = 209715200 current_max_memory = 471859200 # 1 CPU metric + 1 Current Memory + 1 Max memory num_of_metrics_per_extn_expected = 3 patch_get_cpu_usage.return_value = current_cpu patch_get_memory_usage.return_value = current_memory # example 200 MB patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB num_polls = 10 for data_count in range(1, num_polls + 1): metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual(len(metrics), num_extensions * num_of_metrics_per_extn_expected) self._assert_polled_metrics_equal(metrics, current_cpu, current_memory, current_max_memory)
def test_telemetry_polling_with_changing_cgroups_state( self, patch_get_statm, patch_is_active, patch_get_cpu_usage, # pylint: disable=unused-argument,too-many-arguments patch_get_mem, patch_get_max_mem, *args): num_extensions = 5 self._track_new_extension_cgroups(num_extensions) patch_is_active.return_value = True no_extensions_expected = 0 # pylint: disable=unused-variable expected_data_count = 1 # pylint: disable=unused-variable current_cpu = 30 current_memory = 209715200 current_max_memory = 471859200 current_proc_statm = 20000000 patch_get_cpu_usage.return_value = current_cpu patch_get_mem.return_value = current_memory # example 200 MB patch_get_max_mem.return_value = current_max_memory # example 450 MB patch_get_statm.return_value = current_proc_statm self._assert_cgroups_are_tracked(num_extensions) CGroupsTelemetry.poll_all_tracked() self._assert_cgroups_are_tracked(num_extensions) patch_is_active.return_value = False patch_get_cpu_usage.side_effect = raise_ioerror patch_get_mem.side_effect = raise_ioerror patch_get_max_mem.side_effect = raise_ioerror patch_get_statm.side_effect = raise_ioerror CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertFalse( CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertFalse( CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))
def test_telemetry_polling_to_generate_transient_logs_index_error(self, mock_read_file, *args): num_extensions = 1 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) # Generating a different kind of error (non-IOError) to check the logging. # Trying to invoke IndexError during the getParameter call mock_read_file.return_value = '' with patch("azurelinuxagent.common.logger.periodic_warn") as patch_periodic_warn: expected_call_count = 1 # called only once at start, and then gets removed from the tracked data. for data_count in range(1, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(expected_call_count, patch_periodic_warn.call_count)
def test_extension_telemetry_not_sent_for_empty_perf_metrics(self, *args): num_extensions = 5 self._track_new_extension_cgroups(num_extensions) with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = False poll_count = 1 for data_count in range(poll_count, 10): metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual(0, len(metrics))
def test_telemetry_polling_to_generate_transient_logs_ioerror_permission_denied(self, patch_periodic_warn): num_extensions = 1 num_controllers = 2 is_active_check_per_controller = 2 self._track_new_extension_cgroups(num_extensions) self.assertEqual(0, patch_periodic_warn.call_count) # Expecting logs to be present for different kind of errors io_error_3 = IOError() io_error_3.errno = errno.EPERM with patch("azurelinuxagent.common.utils.fileutil.read_file", side_effect=io_error_3): poll_count = 1 expected_count_per_call = num_controllers + is_active_check_per_controller # each collect per controller would generate a log statement, and each cgroup would invoke a # is active check raising an exception for data_count in range(poll_count, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(poll_count * expected_count_per_call, patch_periodic_warn.call_count)
def poll_telemetry_metrics(self): """ This method polls the tracked cgroups to get data from the cgroups filesystem and send the data directly. :return: List of Metrics (which would be sent to PerfCounterMetrics directly. """ metrics = CGroupsTelemetry.poll_all_tracked() if metrics: for metric in metrics: report_metric(metric.category, metric.counter, metric.instance, metric.value)
def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): num_extensions = 5 self._track_new_extension_cgroups(num_extensions) with patch( "azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage" ) as patch_get_memory_max_usage: with patch( "azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage" ) as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CGroup.is_active" ) as patch_is_active: patch_is_active.return_value = True current_memory = 209715200 current_max_memory = 471859200 patch_get_memory_usage.return_value = current_memory # example 200 MB patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB num_polls = 10 for data_count in range(1, num_polls + 1): metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual(len(CGroupsTelemetry._cgroup_metrics), num_extensions) self._assert_calculated_resource_metrics_equal( cpu_usage=[], memory_usage=[current_memory] * data_count, max_memory_usage=[current_max_memory] * data_count, memory_statm_memory_usage=[ TestCGroupsTelemetry.TestProcStatmMemoryUsed ] * data_count, proc_ids=TestCGroupsTelemetry.TestProcessIds) # Memory is only populated, CPU is not. Thus 5 metrics per cgroup. self.assertEqual(len(metrics), num_extensions * 5) self._assert_polled_metrics_equal( metrics, 0, current_memory, current_max_memory, TestCGroupsTelemetry.TestProcStatmMemoryUsed) collected_metrics = CGroupsTelemetry.report_all_tracked() self._assert_extension_metrics_data( collected_metrics, num_extensions, [], [TestCGroupsTelemetry.TestProcStatmMemoryUsed] * num_polls, [current_memory] * num_polls, [current_max_memory] * num_polls, is_cpu_present=False) self.assertEqual(len(CGroupsTelemetry._cgroup_metrics), num_extensions) self._assert_calculated_resource_metrics_equal([], [], [], [], [])
def test_telemetry_polling_to_not_generate_transient_logs_ioerror_file_not_found(self, mock_read_file, patch_periodic_warn, *args): num_extensions = 1 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) self.assertEqual(0, patch_periodic_warn.call_count) # Not expecting logs present for io_error with errno=errno.ENOENT io_error_2 = IOError() io_error_2.errno = errno.ENOENT mock_read_file.side_effect = io_error_2 poll_count = 1 for data_count in range(poll_count, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(0, patch_periodic_warn.call_count)
def test_telemetry_polling_with_inactive_cgroups(self, *_): num_extensions = 5 no_extensions_expected = 0 self._track_new_extension_cgroups(num_extensions) self._assert_cgroups_are_tracked(num_extensions) metrics = CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertFalse(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertFalse(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertEqual(len(metrics), 0)
def test_extension_temetry_not_sent_for_empty_perf_metrics(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry._process_cgroup_metric") as \ patch_process_cgroup_metric: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = False patch_process_cgroup_metric.return_value = {} poll_count = 1 for data_count in range(poll_count, 10): CGroupsTelemetry.poll_all_tracked() collected_metrics = CGroupsTelemetry.report_all_tracked() self.assertEqual(0, len(collected_metrics))
def test_process_cgroup_metric_with_incorrect_cgroups_mounted(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: patch_get_cpu_usage.side_effect = Exception("File not found") patch_get_memory_usage.side_effect = Exception("File not found") for data_count in range(1, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) collected_metrics = {} for name, cgroup_metrics in CGroupsTelemetry._cgroup_metrics.items(): collected_metrics[name] = CGroupsTelemetry._process_cgroup_metric(cgroup_metrics) self.assertEqual(collected_metrics[name], {}) # empty
def test_process_cgroup_metric_with_incorrect_cgroups_mounted(self, *args): num_extensions = 5 self._track_new_extension_cgroups(num_extensions) for data_count in range(1, 10): metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual(len(metrics), 0) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) collected_metrics = {} for name, cgroup_metrics in CGroupsTelemetry._cgroup_metrics.items(): collected_metrics[name] = CGroupsTelemetry._process_cgroup_metric( cgroup_metrics) self.assertEqual(collected_metrics[name], {}) # empty
def test_telemetry_calculations(self, patch_get_statm, patch_is_active, patch_get_cpu_usage, patch_get_memory_usage, patch_get_memory_max_usage, *args): num_polls = 10 num_extensions = 1 cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)] # only verifying calculations and not validity of the values. memory_usage_values = [ random.randint(0, 8 * 1024**3) for _ in range(num_polls) ] max_memory_usage_values = [ random.randint(0, 8 * 1024**3) for _ in range(num_polls) ] proc_stat_memory_usage_values = [ random.randint(0, 8 * 1024**3) for _ in range(num_polls) ] self._track_new_extension_cgroups(num_extensions) self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked)) for i in range(num_polls): patch_is_active.return_value = True patch_get_cpu_usage.return_value = cpu_percent_values[i] patch_get_memory_usage.return_value = memory_usage_values[ i] # example 200 MB patch_get_memory_max_usage.return_value = max_memory_usage_values[ i] # example 450 MB patch_get_statm.return_value = proc_stat_memory_usage_values[i] metrics = CGroupsTelemetry.poll_all_tracked() # 1 CPU metric + 1 Current Memory + 1 Max memory + num_processes (3) * memory from statm self.assertEqual(len(metrics), 6 * num_extensions) self._assert_polled_metrics_equal(metrics, cpu_percent_values[i], memory_usage_values[i], max_memory_usage_values[i], proc_stat_memory_usage_values[i]) collected_metrics = CGroupsTelemetry.report_all_tracked() self._assert_extension_metrics_data(collected_metrics, num_extensions, cpu_percent_values, proc_stat_memory_usage_values, memory_usage_values, max_memory_usage_values)
def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): num_extensions = 5 self._track_new_extension_cgroups(num_extensions) with patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = True current_cpu = 30 patch_get_cpu_usage.return_value = current_cpu poll_count = 1 for data_count in range(poll_count, 10): metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual(len(metrics), num_extensions * 1) # Only CPU populated self._assert_polled_metrics_equal(metrics, current_cpu, 0, 0)
def _operation_impl(self): # # Check the processes in the agent cgroup # processes_check_error = None try: processes = CGroupConfigurator.get_instance( ).get_processes_in_agent_cgroup() if processes is not None: unexpected_processes = [] for (_, command_line) in processes: if not CGroupConfigurator.is_agent_process(command_line): unexpected_processes.append(command_line) if len(unexpected_processes) > 0: unexpected_processes.sort() processes_check_error = "The agent's cgroup includes unexpected processes: {0}".format( ustr(unexpected_processes)) except Exception as e: processes_check_error = "Failed to check the processes in the agent's cgroup: {0}".format( ustr(e)) # Report a small sample of errors if processes_check_error != self._last_error and self._error_count < 5: self._error_count += 1 self._last_error = processes_check_error logger.info(processes_check_error) add_event(op=WALAEventOperation.CGroupsDebug, message=processes_check_error) # # Report metrics # metrics = CGroupsTelemetry.poll_all_tracked() for metric in metrics: report_metric(metric.category, metric.counter, metric.instance, metric.value)