def _initialize(self) -> Optional[int]: """Check privileges, RDT availability and prepare internal state. Can return error code that should stop Runner. """ if not security.are_privileges_sufficient(): log.error( "Insufficient privileges! " "Impossible to use perf_event_open/resctrl subsystems. " "For unprivileged user it is needed to: " "adjust /proc/sys/kernel/perf_event_paranoid (set to -1), " "has CAP_DAC_OVERRIDE and CAP_SETUID capabilities and" "SECBIT_NO_SETUID_FIXUP secure bit set.") return 1 # Initialization (auto discovery Intel RDT features). rdt_available = resctrl.check_resctrl() if self._rdt_enabled is None: self._rdt_enabled = rdt_available log.info('RDT enabled (auto configuration): %s', self._rdt_enabled) elif self._rdt_enabled is True and not rdt_available: log.error('RDT explicitly enabled but not available - exiting!') return 1 if self._rdt_enabled: # Resctrl is enabled and available, call a placeholder to allow further initialization. rdt_initialization_ok = self._initialize_rdt() if not rdt_initialization_ok: return 1 # Postpone the container manager initialization after rdt checks were performed. platform_cpus, _, platform_sockets = platforms.collect_topology_information( ) platform, _, _ = platforms.collect_platform_information( self._rdt_enabled) rdt_information = platform.rdt_information self._event_names = _filter_out_event_names_for_cpu( self._event_names, platform.cpu_codename) # We currently do not support RDT without monitoring. if self._rdt_enabled and not rdt_information.is_monitoring_enabled(): log.error('RDT monitoring is required - please enable CAT ' 'or MBM with kernel parameters!') return 1 self._containers_manager = ContainerManager( platform=platform, allocation_configuration=self._allocation_configuration, event_names=self._event_names, enable_derived_metrics=self._enable_derived_metrics, ) return None
def _initialize(self) -> Optional[int]: """Check RDT availability, privileges and prepare internal state. Can return error code that should stop Runner. """ # Initialization (auto discovery Intel RDT features). rdt_available = resctrl.check_resctrl() if self._rdt_enabled is None: self._rdt_enabled = rdt_available log.info('RDT enabled (auto configuration): %s', self._rdt_enabled) elif self._rdt_enabled is True and not rdt_available: log.error('RDT explicitly enabled but not available - exiting!') return 1 # _allocation_configuration is set in allocation mode (AllocationRunner) # so we need access to write in cgroups. write_to_cgroup = self._allocation_configuration is not None use_resctrl = self._rdt_enabled use_perf = len(self._event_names) > 0 if not security.are_privileges_sufficient(write_to_cgroup, use_resctrl, use_perf): return 1 if self._rdt_enabled: # Resctrl is enabled and available, call a placeholder to allow further initialization. # For MeasurementRunner it's nothing to configure in RDT to measure resource usage. # Check if it's needed to specific rdt initialization in case # of using MeasurementRunner functionality in other runner. if self._initialize_rdt_callback is not None: rdt_initialization_ok = self._initialize_rdt_callback() if not rdt_initialization_ok: return 1 log.debug('rdt_enabled: %s', self._rdt_enabled) log.debug('gather_hw_mm_topology: %s', self._gather_hw_mm_topology) platform, _, _ = platforms.collect_platform_information( self._rdt_enabled, gather_hw_mm_topology=self._gather_hw_mm_topology ) rdt_information = platform.rdt_information self._event_names = _filter_out_event_names_for_cpu( self._event_names, platform.cpu_codename) # We currently do not support RDT without monitoring. if self._rdt_enabled and not rdt_information.is_monitoring_enabled(): log.error('RDT monitoring is required - please enable CAT ' 'or MBM with kernel parameters!') return 1 self._containers_manager = ContainerManager( platform=platform, allocation_configuration=self._allocation_configuration, event_names=self._event_names, enable_derived_metrics=self._enable_derived_metrics, wss_reset_interval=self._wss_reset_interval, perf_aggregate_cpus=self._perf_aggregate_cpus ) self._init_uncore_pmu(self._enable_derived_metrics, self._enable_perf_uncore, platform) return None
def _initialize(self) -> Optional[int]: """Check RDT availability, privileges and prepare internal state. Can return error code that should stop Runner. Flow: - Conclude requirements based on configuration - Conclude required features based on auto discovery - confront user expectations from configuration file with resctrl fs and security access - check RDT HW monitoring features availability """ resctrl_available = resctrl.check_resctrl() # If enabled explicitly check resctrl availability right now. if self._rdt_enabled is True and not resctrl_available: log.error('RDT explicitly enabled but resctrl fs not available - exiting!') return 1 # Auto discovery Intel RDT features. if self._rdt_enabled is None: # Assume yes temporary - but will check monitoring/access later. log.debug('Enable RDT auto discovery (resctrl availability=%s)', resctrl_available) self._rdt_enabled = resctrl_available rdt_auto_enabling = True else: rdt_auto_enabling = False log.debug('gather_hw_mm_topology: %s', self._gather_hw_mm_topology) platform, _, _ = platforms.collect_platform_information( resctrl_available, gather_hw_mm_topology=self._gather_hw_mm_topology ) # Confront RDT (resctrl fs) with HW enabled monitoring features. if self._rdt_enabled and not platform.rdt_information.is_monitoring_enabled(): # Note: WCA does not support RDT without monitoring (keeps a mapping of # cgroups and resctrl groups). msg = ('Resctrl is available but RDT monitoring features are not!' + 'Please enable CMT or MBM with kernel parameters (monitoring is ' + 'required for CAT or MBA allocation)!') if rdt_auto_enabling: log.debug(msg) self._rdt_enabled = False platform.rdt_information = None # override rdt information should not be available later # e.g. ContainerManager else: # If RDT was force fail short here. log.error(msg) return 1 # All RDT checks (security/check) done - show info and call initialization callback. log.info('RDT: %s %s', 'enabled' if self._rdt_enabled else 'disabled', ' (auto discovery)' if rdt_auto_enabling else '', ) # Event names (perf cgroups) self._event_names = filter_out_event_names_for_cpu( self._event_names, platform.cpu_codename) log.info('Enabling %i perf events (for cgroups).', len(self._event_names)) log.debug('Enabling perf events: %s', ', '.join(self._event_names)) # Check and assume most popular number of available number of HW counters. if self._event_names: if not check_perf_event_count_limit(self._event_names, platform.cpus, platform.cores): return 1 # _allocation_configuration is set in allocation mode (AllocationRunner) # so we need access to write in cgroups. write_to_cgroup = self._allocation_configuration is not None use_perf = len(self._event_names) > 0 # Check we have enough access. if not security.are_privileges_sufficient(write_to_cgroup, self._rdt_enabled, use_perf): return 1 # Resctrl is enabled and available, call a placeholder to allow further initialization. # For "measurement mode" it's nothing to configure in RDT. # Check if it's needed to specific rdt initialization in case # of using "MeasurementRunner" as component functionality in other runners e.g. Allocation. if self._rdt_enabled: if self._initialize_rdt_callback is not None: rdt_initialization_ok = self._initialize_rdt_callback() if not rdt_initialization_ok: return 1 self._containers_manager = ContainerManager( platform=platform, allocation_configuration=self._allocation_configuration, event_names=self._event_names, enable_derived_metrics=self._enable_derived_metrics, wss_reset_cycles=self._wss_reset_cycles, wss_stable_cycles=self._wss_stable_cycles, wss_membw_threshold=self._wss_membw_threshold, perf_aggregate_cpus=self._perf_aggregate_cpus, interval=self._interval, sched=self._sched, ) log.log(TRACE, 'container manager config: %s', self._containers_manager.__dict__) self._init_uncore_pmu_events(self._enable_derived_metrics, self._uncore_events, platform) return None