def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent: with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = True patch_get_memory_usage.side_effect = Exception("File not found") current_cpu = 30 patch_get_cpu_percent.return_value = current_cpu poll_count = 1 for data_count in range(poll_count, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_cgroup_metrics_equal(cpu_usage=[current_cpu] * data_count, memory_usage=[], max_memory_usage=[]) CGroupsTelemetry.report_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_cgroup_metrics_equal([], [], [])
def test_telemetry_polling_to_generate_transient_logs_ioerror_permission_denied(self, mock_read_file, patch_periodic_warn, *args): num_extensions = 1 num_controllers = 2 is_active_check_per_controller = 2 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) self.assertEqual(0, patch_periodic_warn.call_count) # Expecting logs to be present for different kind of errors io_error_3 = IOError() io_error_3.errno = errno.EPERM mock_read_file.side_effect = io_error_3 poll_count = 1 expected_count_per_call = num_controllers + is_active_check_per_controller # each collect per controller would generate a log statement, and each cgroup would invoke a # is active check raising an exception for data_count in range(poll_count, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(poll_count * expected_count_per_call, patch_periodic_warn.call_count)
def test_cleanup_legacy_cgroups_should_disable_cgroups_when_the_daemon_was_added_to_the_legacy_cgroup_on_systemd(self, _): # Set up a mock /var/run/waagent.pid file daemon_pid = "42" daemon_pid_file = os.path.join(self.tmp_dir, "waagent.pid") fileutil.write_file(daemon_pid_file, daemon_pid + "\n") # Set up old controller cgroups and add the daemon PID to them CGroupsTools.create_legacy_agent_cgroup(self.cgroups_file_system_root, "cpu", daemon_pid) CGroupsTools.create_legacy_agent_cgroup(self.cgroups_file_system_root, "memory", daemon_pid) # Start tracking a couple of dummy cgroups CGroupsTelemetry.track_cgroup(CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "cpu")) CGroupsTelemetry.track_cgroup(CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "memory")) cgroup_configurator = CGroupConfigurator.get_instance() with patch("azurelinuxagent.common.cgroupconfigurator.add_event") as mock_add_event: with patch("azurelinuxagent.common.cgroupapi.get_agent_pid_file_path", return_value=daemon_pid_file): cgroup_configurator.cleanup_legacy_cgroups() self.assertEquals(len(mock_add_event.call_args_list), 1) _, kwargs = mock_add_event.call_args_list[0] self.assertEquals(kwargs['op'], 'CGroupsCleanUp') self.assertFalse(kwargs['is_success']) self.assertEquals( kwargs['message'], "Failed to process legacy cgroups. Collection of resource usage data will be disabled. [CGroupsException] The daemon's PID ({0}) was already added to the legacy cgroup; this invalidates resource usage data.".format(daemon_pid)) self.assertFalse(cgroup_configurator.enabled()) self.assertEquals(len(CGroupsTelemetry._tracked), 0)
def _track_new_extension_cgroups(num_extensions): for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)
def test_is_active_file_not_present(self, patch_periodic_warn): test_cgroup = CGroup.create(os.path.join(data_dir, "cgroups", "not_cpu_mount"), "cpu", "test_extension") self.assertEqual(False, test_cgroup.is_active()) test_cgroup = CGroup.create(os.path.join(data_dir, "cgroups", "not_memory_mount"), "memory", "test_extension") self.assertEqual(False, test_cgroup.is_active()) self.assertEqual(0, patch_periodic_warn.call_count)
def test_is_active_incorrect_file(self, patch_periodic_warn): test_cgroup = CGroup.create(os.path.join(data_dir, "cgroups", "cpu_mount", "tasks"), "cpu", "test_extension") self.assertEqual(False, test_cgroup.is_active()) self.assertEqual(1, patch_periodic_warn.call_count) test_cgroup = CGroup.create(os.path.join(data_dir, "cgroups", "memory_mount", "tasks"), "memory", "test_extension") self.assertEqual(False, test_cgroup.is_active()) self.assertEqual(2, patch_periodic_warn.call_count)
def test_telemetry_polling_with_changing_cgroups_state(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = True no_extensions_expected = 0 expected_data_count = 2 current_cpu = 30 current_memory = 209715200 current_max_memory = 471859200 patch_get_cpu_percent.return_value = current_cpu patch_get_memory_usage.return_value = current_memory # example 200 MB patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB for i in range(num_extensions): self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) patch_is_active.return_value = False CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertFalse(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertFalse(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_cgroup_metrics_equal( cpu_usage=[current_cpu] * expected_data_count, memory_usage=[current_memory] * expected_data_count, max_memory_usage=[current_max_memory] * expected_data_count) CGroupsTelemetry.report_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), no_extensions_expected) self._assert_cgroup_metrics_equal([], [], [])
def test_telemetry_polling_with_inactive_cgroups(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_max_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = False no_extensions_expected = 0 data_count = 1 current_cpu = 30 current_memory = 209715200 current_max_memory = 471859200 patch_get_cpu_percent.return_value = current_cpu patch_get_memory_usage.return_value = current_memory # example 200 MB patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB for i in range(num_extensions): self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertFalse(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertFalse(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items(): current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics() self.assertEqual(len(current_memory_usage._data), data_count) self.assertListEqual(current_memory_usage._data, [current_memory] * data_count) self.assertEqual(len(max_memory_levels._data), data_count) self.assertListEqual(max_memory_levels._data, [current_max_memory] * data_count) self.assertEqual(len(current_cpu_usage._data), data_count) self.assertListEqual(current_cpu_usage._data, [current_cpu] * data_count) CGroupsTelemetry.report_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), no_extensions_expected) for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items(): current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics() self.assertEqual(len(current_memory_usage._data), 0) self.assertEqual(len(max_memory_levels._data), 0) self.assertEqual(len(current_cpu_usage._data), 0)
def test_cleanup_legacy_cgroups_should_disable_cgroups_when_it_fails_to_process_legacy_cgroups( self): # Set up a mock /var/run/waagent.pid file daemon_pid = "42" daemon_pid_file = os.path.join(self.tmp_dir, "waagent.pid") fileutil.write_file(daemon_pid_file, daemon_pid + "\n") # Set up old controller cgroups and add the daemon PID to them CGroupsTools.create_legacy_agent_cgroup(self.cgroups_file_system_root, "cpu", daemon_pid) CGroupsTools.create_legacy_agent_cgroup(self.cgroups_file_system_root, "memory", daemon_pid) # Set up new controller cgroups and add extension handler's PID to them CGroupsTools.create_agent_cgroup(self.cgroups_file_system_root, "cpu", "999") CGroupsTools.create_agent_cgroup(self.cgroups_file_system_root, "memory", "999") def mock_append_file(filepath, contents, **kwargs): if re.match(r'/.*/cpu/.*/cgroup.procs', filepath): raise OSError(errno.ENOSPC, os.strerror(errno.ENOSPC)) fileutil.append_file(filepath, contents, **kwargs) # Start tracking a couple of dummy cgroups CGroupsTelemetry.track_cgroup( CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "cpu")) CGroupsTelemetry.track_cgroup( CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "memory")) cgroup_configurator = CGroupConfigurator.get_instance() with patch("azurelinuxagent.common.cgroupconfigurator.add_event" ) as mock_add_event: with patch( "azurelinuxagent.common.cgroupapi.get_agent_pid_file_path", return_value=daemon_pid_file): with patch( "azurelinuxagent.common.cgroupapi.fileutil.append_file", side_effect=mock_append_file): cgroup_configurator.cleanup_legacy_cgroups() self.assertEquals(len(mock_add_event.call_args_list), 1) _, kwargs = mock_add_event.call_args_list[0] self.assertEquals(kwargs['op'], 'CGroupsCleanUp') self.assertFalse(kwargs['is_success']) self.assertEquals( kwargs['message'], 'Failed to process legacy cgroups. Collection of resource usage data will be disabled. [Errno 28] No space left on device' ) self.assertFalse(cgroup_configurator.enabled()) self.assertEquals(len(CGroupsTelemetry._tracked), 0)
def test_disable_should_reset_tracked_cgroups(self): configurator = CGroupConfigurator.get_instance() # Start tracking a couple of dummy cgroups CGroupsTelemetry.track_cgroup(CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "cpu")) CGroupsTelemetry.track_cgroup(CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "memory")) configurator.disable() self.assertFalse(configurator.enabled()) self.assertEquals(len(CGroupsTelemetry._tracked), 0)
def test_telemetry_calculations(self, *args): num_polls = 10 num_extensions = 1 num_summarization_values = 7 cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)] # only verifying calculations and not validity of the values. memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)] max_memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)] for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked)) with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: for i in range(num_polls): patch_is_active.return_value = True patch_get_cpu_percent.return_value = cpu_percent_values[i] patch_get_memory_usage.return_value = memory_usage_values[i] # example 200 MB patch_get_memory_max_usage.return_value = max_memory_usage_values[i] # example 450 MB CGroupsTelemetry.poll_all_tracked() collected_metrics = CGroupsTelemetry.report_all_tracked() for i in range(num_extensions): name = "dummy_extension_{0}".format(i) self.assertIn(name, collected_metrics) self.assertIn("memory", collected_metrics[name]) self.assertIn("cur_mem", collected_metrics[name]["memory"]) self.assertIn("max_mem", collected_metrics[name]["memory"]) self.assertEqual(num_summarization_values, len(collected_metrics[name]["memory"]["cur_mem"])) self.assertEqual(num_summarization_values, len(collected_metrics[name]["memory"]["max_mem"])) self.assertListEqual(generate_metric_list(memory_usage_values), collected_metrics[name]["memory"]["cur_mem"][0:5]) self.assertListEqual(generate_metric_list(max_memory_usage_values), collected_metrics[name]["memory"]["max_mem"][0:5]) self.assertIn("cpu", collected_metrics[name]) self.assertIn("cur_cpu", collected_metrics[name]["cpu"]) self.assertEqual(num_summarization_values, len(collected_metrics[name]["cpu"]["cur_cpu"])) self.assertListEqual(generate_metric_list(cpu_percent_values), collected_metrics[name]["cpu"]["cur_cpu"][0:5])
def test_correct_creation(self): test_cgroup = CGroup.create("dummy_path", "cpu", "test_extension") self.assertIsInstance(test_cgroup, CpuCgroup) self.assertEqual(test_cgroup.controller, "cpu") self.assertEqual(test_cgroup.path, "dummy_path") self.assertEqual(test_cgroup.name, "test_extension") test_cgroup = CGroup.create("dummy_path", "memory", "test_extension") self.assertIsInstance(test_cgroup, MemoryCgroup) self.assertEqual(test_cgroup.controller, "memory") self.assertEqual(test_cgroup.path, "dummy_path") self.assertEqual(test_cgroup.name, "test_extension")
def test_disable_should_reset_tracked_cgroups(self): # Start tracking a couple of dummy cgroups CGroupsTelemetry.track_cgroup( CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "cpu")) CGroupsTelemetry.track_cgroup( CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "memory")) CGroupConfiguratorSystemdTestCase._get_new_cgroup_configurator_instance( ).disable() self.assertEquals(len(CGroupsTelemetry._tracked), 0)
def test_generate_extension_metrics_telemetry_dictionary(self, *args): num_polls = 10 num_extensions = 1 cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)] # only verifying calculations and not validity of the values. memory_usage_values = [ random.randint(0, 8 * 1024**3) for _ in range(num_polls) ] max_memory_usage_values = [ random.randint(0, 8 * 1024**3) for _ in range(num_polls) ] # no need to initialize the CPU usage, since we mock get_cpu_usage() below with patch( "azurelinuxagent.common.cgroup.CpuCgroup.initialize_cpu_usage" ): for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create( "dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create( "dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked)) with patch( "azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage" ) as patch_get_memory_max_usage: with patch( "azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage" ) as patch_get_memory_usage: with patch( "azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage" ) as patch_get_cpu_usage: with patch("azurelinuxagent.common.cgroup.CGroup.is_active" ) as patch_is_active: for i in range(num_polls): patch_is_active.return_value = True patch_get_cpu_usage.return_value = cpu_percent_values[ i] patch_get_memory_usage.return_value = memory_usage_values[ i] # example 200 MB patch_get_memory_max_usage.return_value = max_memory_usage_values[ i] # example 450 MB CGroupsTelemetry.poll_all_tracked()
def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_max_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup._get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.common.cgroup.CpuCgroup.collect") as patch_cpu_cgroup_collect: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = True patch_cpu_cgroup_collect.side_effect = Exception("File not found") current_memory = 209715200 current_max_memory = 471859200 patch_get_memory_usage.return_value = current_memory # example 200 MB patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB poll_count = 1 for data_count in range(poll_count, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items(): current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics() self.assertEqual(len(current_memory_usage._data), data_count) self.assertListEqual(current_memory_usage._data, [current_memory] * data_count) self.assertEqual(len(max_memory_levels._data), data_count) self.assertListEqual(max_memory_levels._data, [current_max_memory] * data_count) self.assertEqual(len(current_cpu_usage._data), 0) self.assertListEqual(current_cpu_usage._data, []) CGroupsTelemetry.report_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) for cgroup_name, cgroup_metric in CGroupsTelemetry._cgroup_metrics.items(): current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metric.get_metrics() self.assertEqual(len(current_memory_usage._data), 0) self.assertEqual(len(max_memory_levels._data), 0) self.assertEqual(len(current_cpu_usage._data), 0)
def test_is_active(self): test_cgroup = CGroup.create(os.path.join(data_dir, "cgroups", "cpu_mount"), "cpu", "test_extension") self.assertEqual(False, test_cgroup.is_active()) with open(os.path.join(data_dir, "cgroups", "cpu_mount", "tasks"), mode="wb") as tasks: tasks.write(str(1000).encode()) self.assertEqual(True, test_cgroup.is_active()) test_cgroup = CGroup.create(os.path.join(data_dir, "cgroups", "memory_mount"), "memory", "test_extension") self.assertEqual(False, test_cgroup.is_active()) with open(os.path.join(data_dir, "cgroups", "memory_mount", "tasks"), mode="wb") as tasks: tasks.write(str(1000).encode()) self.assertEqual(True, test_cgroup.is_active())
def test_cgroup_is_tracked(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}". format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) for i in range(num_extensions): self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertFalse(CGroupsTelemetry.is_tracked("not_present_cpu_dummy_path")) self.assertFalse(CGroupsTelemetry.is_tracked("not_present_memory_dummy_path"))
def test_cgroup_tracking(self, *args): num_extensions = 5 num_controllers = 2 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) for i in range(num_extensions): self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) self.assertEqual(num_extensions * num_controllers, len(CGroupsTelemetry._tracked))
def test_get_tracked_processes(self): test_cgroup = CGroup.create(self.tmp_dir, "cpu", "test_extension") self.assertListEqual(test_cgroup.get_tracked_processes(), []) with open(os.path.join(self.tmp_dir, "cgroup.procs"), mode="wb") as tasks: tasks.write(str(1000).encode()) self.assertEqual(['1000'], test_cgroup.get_tracked_processes())
def test_send_extension_metrics_telemetry_for_unsupported_cgroup( self, patch_periodic_warn, patch_add_metric, *args): CGroupsTelemetry._tracked.append( CGroup("cgroup_name", "/test/path", "io")) PollResourceUsageOperation().run() self.assertEqual(1, patch_periodic_warn.call_count) self.assertEqual( 0, patch_add_metric.call_count) # No metrics should be sent.
def create_cgroup(controller): path = FileSystemCgroupsApi._get_agent_cgroup_path(controller) if not os.path.isdir(path): FileSystemCgroupsApi._try_mkdir(path) logger.info("Created cgroup {0}".format(path)) self._add_process_to_cgroup(pid, path) cgroups.append(CGroup.create(path, controller, VM_AGENT_CGROUP_NAME))
def test_telemetry_polling_to_generate_transient_logs_index_error(self, mock_read_file, *args): num_extensions = 1 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) # Generating a different kind of error (non-IOError) to check the logging. # Trying to invoke IndexError during the getParameter call mock_read_file.return_value = '' with patch("azurelinuxagent.common.logger.periodic_warn") as patch_periodic_warn: expected_call_count = 1 # called only once at start, and then gets removed from the tracked data. for data_count in range(1, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(expected_call_count, patch_periodic_warn.call_count)
def create_agent_cgroups(self): try: cgroup_unit = None cgroup_paths = fileutil.read_file("/proc/self/cgroup") for entry in cgroup_paths.splitlines(): fields = entry.split(':') if fields[1] == "name=systemd": cgroup_unit = fields[2].lstrip(os.path.sep) cpu_cgroup_path = os.path.join(CGROUPS_FILE_SYSTEM_ROOT, 'cpu', cgroup_unit) memory_cgroup_path = os.path.join(CGROUPS_FILE_SYSTEM_ROOT, 'memory', cgroup_unit) return [ CGroup.create(cpu_cgroup_path, 'cpu', cgroup_unit), CGroup.create(memory_cgroup_path, 'memory', cgroup_unit) ] except Exception as e: raise CGroupsException( "Failed to get paths of agent's cgroups. Error: {0}".format( ustr(e)))
def test_telemetry_polling_to_not_generate_transient_logs_ioerror_file_not_found(self, mock_read_file, patch_periodic_warn, *args): num_extensions = 1 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) self.assertEqual(0, patch_periodic_warn.call_count) # Not expecting logs present for io_error with errno=errno.ENOENT io_error_2 = IOError() io_error_2.errno = errno.ENOENT mock_read_file.side_effect = io_error_2 poll_count = 1 for data_count in range(poll_count, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(0, patch_periodic_warn.call_count)
def test_extension_temetry_not_sent_for_empty_perf_metrics(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry._process_cgroup_metric") as \ patch_process_cgroup_metric: with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = False patch_process_cgroup_metric.return_value = {} poll_count = 1 for data_count in range(poll_count, 10): CGroupsTelemetry.poll_all_tracked() collected_metrics = CGroupsTelemetry.report_all_tracked() self.assertEqual(0, len(collected_metrics))
def test_process_cgroup_metric_with_incorrect_cgroups_mounted(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage: with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: patch_get_cpu_usage.side_effect = Exception("File not found") patch_get_memory_usage.side_effect = Exception("File not found") for data_count in range(1, 10): CGroupsTelemetry.poll_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) collected_metrics = {} for name, cgroup_metrics in CGroupsTelemetry._cgroup_metrics.items(): collected_metrics[name] = CGroupsTelemetry._process_cgroup_metric(cgroup_metrics) self.assertEqual(collected_metrics[name], {}) # empty
def test_send_extension_metrics_telemetry_for_unsupported_cgroup( self, patch_periodic_warn, patch_add_metric, *args): CGroupsTelemetry._tracked.append( CGroup("cgroup_name", "/test/path", "io")) monitor_handler = get_monitor_handler() monitor_handler.init_protocols() monitor_handler.last_cgroup_polling_telemetry = datetime.datetime.utcnow( ) - timedelta(hours=1) monitor_handler.last_cgroup_report_telemetry = datetime.datetime.utcnow( ) - timedelta(hours=1) monitor_handler.poll_telemetry_metrics() self.assertEqual(1, patch_periodic_warn.call_count) self.assertEqual( 0, patch_add_metric.call_count) # No metrics should be sent. monitor_handler.stop()
def create_cgroup(controller): try: path = os.path.join(CGROUPS_FILE_SYSTEM_ROOT, controller, VM_AGENT_CGROUP_NAME) if not os.path.isdir(path): FileSystemCgroupsApi._try_mkdir(path) logger.info("Created cgroup {0}".format(path)) self._add_process_to_cgroup(pid, path) cgroups.append( CGroup.create(path, controller, VM_AGENT_CGROUP_NAME)) except Exception as e: logger.warn( 'Cannot create "{0}" cgroup for the agent. Error: {1}'. format(controller, ustr(e)))
def create_cgroup(controller): cpu_cgroup_path = os.path.join(CGROUPS_FILE_SYSTEM_ROOT, controller, 'system.slice', slice_name) cgroups.append(CGroup.create(cpu_cgroup_path, controller, extension_name))
def _create_extension_cgroup(self, controller, extension_name): return CGroup.create(self._get_extension_cgroup_path(controller, extension_name), controller, extension_name)