Beispiel #1
0
    def _initialize(self) -> Optional[int]:
        """Check privileges, RDT availability and prepare internal state.
        Can return error code that should stop Runner.
        """
        if not security.are_privileges_sufficient():
            log.error(
                "Insufficient privileges! "
                "Impossible to use perf_event_open/resctrl subsystems. "
                "For unprivileged user it is needed to: "
                "adjust /proc/sys/kernel/perf_event_paranoid (set to -1), "
                "has CAP_DAC_OVERRIDE and CAP_SETUID capabilities and"
                "SECBIT_NO_SETUID_FIXUP secure bit set.")
            return 1

        # Initialization (auto discovery Intel RDT features).

        rdt_available = resctrl.check_resctrl()
        if self._rdt_enabled is None:
            self._rdt_enabled = rdt_available
            log.info('RDT enabled (auto configuration): %s', self._rdt_enabled)
        elif self._rdt_enabled is True and not rdt_available:
            log.error('RDT explicitly enabled but not available - exiting!')
            return 1

        if self._rdt_enabled:
            # Resctrl is enabled and available, call a placeholder to allow further initialization.
            rdt_initialization_ok = self._initialize_rdt()
            if not rdt_initialization_ok:
                return 1

        # Postpone the container manager initialization after rdt checks were performed.
        platform_cpus, _, platform_sockets = platforms.collect_topology_information(
        )

        platform, _, _ = platforms.collect_platform_information(
            self._rdt_enabled)
        rdt_information = platform.rdt_information

        self._event_names = _filter_out_event_names_for_cpu(
            self._event_names, platform.cpu_codename)

        # We currently do not support RDT without monitoring.
        if self._rdt_enabled and not rdt_information.is_monitoring_enabled():
            log.error('RDT monitoring is required - please enable CAT '
                      'or MBM with kernel parameters!')
            return 1

        self._containers_manager = ContainerManager(
            platform=platform,
            allocation_configuration=self._allocation_configuration,
            event_names=self._event_names,
            enable_derived_metrics=self._enable_derived_metrics,
        )
        return None
    def _initialize(self) -> Optional[int]:
        """Check RDT availability, privileges and prepare internal state.
        Can return error code that should stop Runner.
        """

        # Initialization (auto discovery Intel RDT features).
        rdt_available = resctrl.check_resctrl()
        if self._rdt_enabled is None:
            self._rdt_enabled = rdt_available
            log.info('RDT enabled (auto configuration): %s', self._rdt_enabled)
        elif self._rdt_enabled is True and not rdt_available:
            log.error('RDT explicitly enabled but not available - exiting!')
            return 1

        # _allocation_configuration is set in allocation mode (AllocationRunner)
        # so we need access to write in cgroups.
        write_to_cgroup = self._allocation_configuration is not None
        use_resctrl = self._rdt_enabled
        use_perf = len(self._event_names) > 0

        if not security.are_privileges_sufficient(write_to_cgroup, use_resctrl, use_perf):
            return 1

        if self._rdt_enabled:
            # Resctrl is enabled and available, call a placeholder to allow further initialization.
            # For MeasurementRunner it's nothing to configure in RDT to measure resource usage.

            # Check if it's needed to specific rdt initialization in case
            # of using MeasurementRunner functionality in other runner.
            if self._initialize_rdt_callback is not None:
                rdt_initialization_ok = self._initialize_rdt_callback()

                if not rdt_initialization_ok:
                    return 1

        log.debug('rdt_enabled: %s', self._rdt_enabled)
        log.debug('gather_hw_mm_topology: %s', self._gather_hw_mm_topology)
        platform, _, _ = platforms.collect_platform_information(
            self._rdt_enabled,
            gather_hw_mm_topology=self._gather_hw_mm_topology
        )
        rdt_information = platform.rdt_information

        self._event_names = _filter_out_event_names_for_cpu(
            self._event_names, platform.cpu_codename)

        # We currently do not support RDT without monitoring.
        if self._rdt_enabled and not rdt_information.is_monitoring_enabled():
            log.error('RDT monitoring is required - please enable CAT '
                      'or MBM with kernel parameters!')
            return 1

        self._containers_manager = ContainerManager(
            platform=platform,
            allocation_configuration=self._allocation_configuration,
            event_names=self._event_names,
            enable_derived_metrics=self._enable_derived_metrics,
            wss_reset_interval=self._wss_reset_interval,
            perf_aggregate_cpus=self._perf_aggregate_cpus
        )

        self._init_uncore_pmu(self._enable_derived_metrics, self._enable_perf_uncore, platform)

        return None
Beispiel #3
0
    def _initialize(self) -> Optional[int]:
        """Check RDT availability, privileges and prepare internal state.
        Can return error code that should stop Runner.

        Flow:
        - Conclude requirements based on configuration
        - Conclude required features based on auto discovery
        - confront user expectations from configuration file with resctrl fs and security access
        - check RDT HW monitoring features availability
        """
        resctrl_available = resctrl.check_resctrl()
        # If enabled explicitly check resctrl availability right now.
        if self._rdt_enabled is True and not resctrl_available:
            log.error('RDT explicitly enabled but resctrl fs not available - exiting!')
            return 1

        # Auto discovery Intel RDT features.
        if self._rdt_enabled is None:
            # Assume yes temporary - but will check monitoring/access later.
            log.debug('Enable RDT auto discovery (resctrl availability=%s)', resctrl_available)
            self._rdt_enabled = resctrl_available
            rdt_auto_enabling = True
        else:
            rdt_auto_enabling = False

        log.debug('gather_hw_mm_topology: %s', self._gather_hw_mm_topology)
        platform, _, _ = platforms.collect_platform_information(
            resctrl_available,
            gather_hw_mm_topology=self._gather_hw_mm_topology
        )

        # Confront RDT (resctrl fs) with HW enabled monitoring features.
        if self._rdt_enabled and not platform.rdt_information.is_monitoring_enabled():
            # Note: WCA does not support RDT without monitoring (keeps a mapping of
            # cgroups and resctrl groups).
            msg = ('Resctrl is available but RDT monitoring features are not!' +
                   'Please enable CMT or MBM with kernel parameters (monitoring is ' +
                   'required for CAT or MBA allocation)!')
            if rdt_auto_enabling:
                log.debug(msg)
                self._rdt_enabled = False
                platform.rdt_information = None
                # override rdt information should not be available later
                # e.g. ContainerManager
            else:
                # If RDT was force fail short here.
                log.error(msg)
                return 1

        # All RDT checks (security/check) done - show info and call initialization callback.
        log.info('RDT: %s %s', 'enabled' if self._rdt_enabled else 'disabled',
                 ' (auto discovery)' if rdt_auto_enabling else '',
                 )

        # Event names (perf cgroups)
        self._event_names = filter_out_event_names_for_cpu(
            self._event_names, platform.cpu_codename)

        log.info('Enabling %i perf events (for cgroups).', len(self._event_names))
        log.debug('Enabling perf events: %s', ', '.join(self._event_names))
        # Check and assume most popular number of available number of HW counters.
        if self._event_names:
            if not check_perf_event_count_limit(self._event_names, platform.cpus, platform.cores):
                return 1

        # _allocation_configuration is set in allocation mode (AllocationRunner)
        # so we need access to write in cgroups.
        write_to_cgroup = self._allocation_configuration is not None
        use_perf = len(self._event_names) > 0
        # Check we have enough access.
        if not security.are_privileges_sufficient(write_to_cgroup, self._rdt_enabled, use_perf):
            return 1

        # Resctrl is enabled and available, call a placeholder to allow further initialization.
        # For "measurement mode" it's nothing to configure in RDT.
        # Check if it's needed to specific rdt initialization in case
        # of using "MeasurementRunner" as component functionality in other runners e.g. Allocation.
        if self._rdt_enabled:
            if self._initialize_rdt_callback is not None:
                rdt_initialization_ok = self._initialize_rdt_callback()

                if not rdt_initialization_ok:
                    return 1

        self._containers_manager = ContainerManager(
            platform=platform,
            allocation_configuration=self._allocation_configuration,
            event_names=self._event_names,
            enable_derived_metrics=self._enable_derived_metrics,
            wss_reset_cycles=self._wss_reset_cycles,
            wss_stable_cycles=self._wss_stable_cycles,
            wss_membw_threshold=self._wss_membw_threshold,
            perf_aggregate_cpus=self._perf_aggregate_cpus,
            interval=self._interval,
            sched=self._sched,
        )
        log.log(TRACE, 'container manager config: %s', self._containers_manager.__dict__)

        self._init_uncore_pmu_events(self._enable_derived_metrics, self._uncore_events, platform)

        return None