def init_cgroups(): # Track metrics for the roll-up cgroup and for the agent cgroup try: CGroupsTelemetry.track_cgroup(CGroups.for_extension("")) CGroupsTelemetry.track_agent() except Exception as e: logger.error("monitor: Exception tracking wrapper and agent: {0} [{1}]", e, traceback.format_exc())
def init_cgroups(): # Track metrics for the roll-up cgroup and for the agent cgroup try: CGroupsTelemetry.track_cgroup(CGroups.for_extension("")) CGroupsTelemetry.track_agent() except Exception as e: logger.error( "monitor: Exception tracking wrapper and agent: {0} [{1}]", e, traceback.format_exc())
def init_cgroups(): # Track metrics for the wrapper cgroup and for the agent cgroup try: # This creates the wrapper cgroup for everything under agent, # /sys/fs/cgroup/{cpu,memory}/WALinuxAgent/ # There is no need in tracking this cgroup, as it only serves # as an umbrella for the agent and extensions cgroups CGroups.for_extension("") # This creates the agent's cgroup (for the daemon and extension handler) # /sys/fs/cgroup/{cpu,memory}/WALinuxAgent/WALinuxAgent # If the system is using systemd, it would have already been set up under /system.slice CGroupsTelemetry.track_agent() except Exception as e: # when a hierarchy is not mounted, we raise an exception # and we should therefore only issue a warning, since this # is not unexpected logger.warn("Monitor: cgroups not initialized: {0}", ustr(e)) logger.verbose(traceback.format_exc())
def init_cgroups(): # Track metrics for the roll-up cgroup and for the agent cgroup try: CGroupsTelemetry.track_cgroup(CGroups.for_extension("")) CGroupsTelemetry.track_agent() except Exception as e: # when a hierarchy is not mounted, we raise an exception # and we should therefore only issue a warning, since this # is not unexpected logger.warn("Monitor: cgroups not initialized: {0}", ustr(e)) logger.verbose(traceback.format_exc())
def launch_command(self, cmd, timeout=300, extension_error_code=1000, env=None): begin_utc = datetime.datetime.utcnow() self.logger.verbose("Launch command: [{0}]", cmd) base_dir = self.get_base_dir() if env is None: env = {} env.update(os.environ) try: # This should be .run(), but due to the wide variety # of Python versions we must support we must use .communicate(). # Some extensions erroneously begin cmd with a slash; don't interpret those # as root-relative. (Issue #1170) full_path = os.path.join(base_dir, cmd.lstrip(os.path.sep)) def pre_exec_function(): """ Change process state before the actual target process is started. Effectively, this runs between the fork() and the exec() of sub-process creation. :return: """ os.setsid() CGroups.add_to_extension_cgroup(self.ext_handler.name) process = subprocess.Popen(full_path, shell=True, cwd=base_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, preexec_fn=pre_exec_function) except OSError as e: raise ExtensionError("Failed to launch '{0}': {1}".format(full_path, e.strerror), code=extension_error_code) cg = CGroups.for_extension(self.ext_handler.name) CGroupsTelemetry.track_extension(self.ext_handler.name, cg) msg = capture_from_process(process, cmd, timeout, extension_error_code) ret = process.poll() if ret is None: raise ExtensionError("Process {0} was not terminated: {1}\n{2}".format(process.pid, cmd, msg), code=extension_error_code) if ret != 0: raise ExtensionError("Non-zero exit code: {0}, {1}\n{2}".format(ret, cmd, msg), code=extension_error_code) duration = elapsed_milliseconds(begin_utc) log_msg = "{0}\n{1}".format(cmd, "\n".join([line for line in msg.split('\n') if line != ""])) self.logger.verbose(log_msg) self.report_event(message=log_msg, duration=duration, log_event=False)
def assert_limits(self, _, patch_set_cpu, patch_set_memory_limit, patch_get_enforce, patch_add_event, ext_name, expected_cpu_limit, limits_enforced=True, exception_raised=False): should_limit = expected_cpu_limit > 0 patch_get_enforce.return_value = limits_enforced if exception_raised: patch_set_memory_limit.side_effect = CGroupsException( 'set_memory_limit error') try: cg = CGroups.for_extension(ext_name) cg.set_limits() if exception_raised: self.fail('exception expected') except CGroupsException: if not exception_raised: self.fail('exception not expected') self.assertEqual(should_limit, patch_set_cpu.called) self.assertEqual(should_limit, patch_set_memory_limit.called) self.assertEqual(should_limit, patch_add_event.called) if should_limit: actual_cpu_limit = patch_set_cpu.call_args[0][0] actual_memory_limit = patch_set_memory_limit.call_args[0][0] event_kw_args = patch_add_event.call_args[1] self.assertEqual(expected_cpu_limit, actual_cpu_limit) self.assertTrue(actual_memory_limit >= DEFAULT_MEM_LIMIT_MIN_MB) self.assertEqual(event_kw_args['op'], 'SetCGroupsLimits') self.assertEqual(event_kw_args['is_success'], not exception_raised) self.assertTrue( '{0}%'.format(expected_cpu_limit) in event_kw_args['message']) self.assertTrue(ext_name in event_kw_args['message']) self.assertEqual( exception_raised, 'set_memory_limit error' in event_kw_args['message'])
def test_telemetry_instantiation_as_superuser(self): """ Tracking a new cgroup for an extension; collect all metrics. """ # Record initial state initial_cgroup = make_self_cgroups() # Put the process into a different cgroup, consume some resources, ensure we see them end-to-end test_cgroup = CGroups.for_extension("agent_unittest") test_cgroup.add(os.getpid()) self.assertNotEqual(initial_cgroup.cgroups['cpu'], test_cgroup.cgroups['cpu']) self.assertNotEqual(initial_cgroup.cgroups['memory'], test_cgroup.cgroups['memory']) self.exercise_telemetry_instantiation(test_cgroup) # Restore initial state CGroupsTelemetry.stop_tracking("agent_unittest") initial_cgroup.add(os.getpid())
def assert_limits(self, _, patch_set_cpu, patch_set_memory_limit, patch_get_enforce, patch_add_event, ext_name, expected_cpu_limit, limits_enforced=True, exception_raised=False): should_limit = expected_cpu_limit > 0 patch_get_enforce.return_value = limits_enforced if exception_raised: patch_set_memory_limit.side_effect = CGroupsException('set_memory_limit error') try: cg = CGroups.for_extension(ext_name) cg.set_limits() if exception_raised: self.fail('exception expected') except CGroupsException: if not exception_raised: self.fail('exception not expected') self.assertEqual(should_limit, patch_set_cpu.called) self.assertEqual(should_limit, patch_set_memory_limit.called) self.assertEqual(should_limit, patch_add_event.called) if should_limit: actual_cpu_limit = patch_set_cpu.call_args[0][0] actual_memory_limit = patch_set_memory_limit.call_args[0][0] event_kw_args = patch_add_event.call_args[1] self.assertEqual(expected_cpu_limit, actual_cpu_limit) self.assertTrue(actual_memory_limit >= DEFAULT_MEM_LIMIT_MIN_MB) self.assertEqual(event_kw_args['op'], 'SetCGroupsLimits') self.assertEqual(event_kw_args['is_success'], not exception_raised) self.assertTrue('{0}%'.format(expected_cpu_limit) in event_kw_args['message']) self.assertTrue(ext_name in event_kw_args['message']) self.assertEqual(exception_raised, 'set_memory_limit error' in event_kw_args['message'])