def normalize_mb_string(mb: str, platform_sockets: int, mb_min_bandwidth: int, mb_bandwidth_gran: int) -> str: assert mb_min_bandwidth is not None assert mb_bandwidth_gran is not None if not mb.startswith('MB:'): raise InvalidAllocations( 'mb resources setting should start with "MB:" prefix (got %r)' % mb) domains = _parse_schemata_file_row(mb) _validate_domains(domains, platform_sockets) normalized_mb_string = 'MB:' for domain in domains: try: mb_value = int(domains[domain]) except ValueError: raise InvalidAllocations("{} is not integer format".format( domains[domain])) normalized_mb_value = _normalize_mb_value(mb_value, mb_min_bandwidth, mb_bandwidth_gran) normalized_mb_string += '{}={};'.format(domain, normalized_mb_value) normalized_mb_string = normalized_mb_string[:-1] return normalized_mb_string
def check_cbm_mask(mask: str, cbm_mask: str, min_cbm_bits: str): mask = int(mask, 16) cbm_mask = int(cbm_mask, 16) if mask > cbm_mask: raise InvalidAllocations('Mask is bigger than allowed') bin_mask = format(mask, 'b') number_of_cbm_bits = 0 series_of_ones_finished = False previous = '0' for bit in bin_mask: if bit == '1': if series_of_ones_finished: raise InvalidAllocations( 'Bit series of ones in mask ' 'must occur without a gap between them') number_of_cbm_bits += 1 previous = bit elif bit == '0': if previous == '1': series_of_ones_finished = True previous = bit min_cbm_bits = int(min_cbm_bits) if number_of_cbm_bits < min_cbm_bits: raise InvalidAllocations( str(number_of_cbm_bits) + " cbm bits. Requires minimum " + str(min_cbm_bits))
def validate(self): if len(self.value) > 0: if self.value[0] < self.min_value or self.value[ -1] > self.max_value: raise InvalidAllocations('{} not in range <{};{}>'.format( self._original_value, self.min_value, self.max_value)) else: raise InvalidAllocations('{} is invalid argument!'.format( self._original_value))
def check_mb_value(mb_value: str, mb_min_bandwidth): try: mb_value = int(mb_value) except ValueError: raise InvalidAllocations("{} is not integer format".format(mb_value)) if mb_value < mb_min_bandwidth: raise InvalidAllocations( "mb allocation smaller than minimum value {}".format( str(mb_min_bandwidth)))
def _validate_domains(domains: List[str], platform_sockets): for domain in domains: try: domain_int = int(domain) if not (0 <= domain_int < platform_sockets): raise InvalidAllocations('invalid domain id - out of range' '(got=%r number_of_sockets=%i )' % (domain_int, platform_sockets)) except ValueError as e: raise InvalidAllocations('invalid domain id - non numeric' '(got=%r error=%s)' % (domain, e))
def cleanup_resctrl(root_rdt_l3: Optional[str], root_rdt_mb: Optional[str], reset_resctrl=False): """Reinitialize resctrl filesystem: by removing subfolders (both CTRL and MON groups) and setting default values for cache allocation and memory bandwidth (in root CTRL group). Can raise InvalidAllocations exception. """ if reset_resctrl: log.info('RDT: removing all resctrl groups') def _remove_folders(initialdir, subfolder): """Removed subfolders of subfolder of initialdir """ for entry in os.listdir(os.path.join(initialdir, subfolder)): directory_path = os.path.join(BASE_RESCTRL_PATH, subfolder, entry) # Only examine folders at first level. if os.path.isdir(directory_path): # Examine tasks file resctrl_tasks_path = os.path.join(directory_path, TASKS_FILENAME) if not os.path.exists(resctrl_tasks_path): # Skip metadata folders e.g. info. continue log.warning( 'Resctrl: Found ctrl or mon group at %r - recycle CLOS/RMID resource.', directory_path) log.log(logger.TRACE, 'resctrl (mon_groups) - _cleanup: rmdir(%s)', directory_path) os.rmdir(directory_path) # Remove all monitoring groups for both CLOS and RMID. _remove_folders(BASE_RESCTRL_PATH, MON_GROUPS) # Remove all resctrl groups. _remove_folders(BASE_RESCTRL_PATH, '') # Reinitialize default values for RDT. if root_rdt_l3 is not None: log.info('RDT: reconfiguring root RDT group for L3 resource with: %r', root_rdt_l3) with open(os.path.join(BASE_RESCTRL_PATH, SCHEMATA), 'bw') as schemata: log.log(logger.TRACE, 'resctrl: write(%s): %r', schemata.name, root_rdt_l3) try: schemata.write(bytes(root_rdt_l3 + '\n', encoding='utf-8')) schemata.flush() except OSError as e: raise InvalidAllocations('Cannot set L3 allocation for default group: %s' % e) if root_rdt_mb is not None: log.info('RDT: reconfiguring root RDT group for MB resource with: %r', root_rdt_mb) with open(os.path.join(BASE_RESCTRL_PATH, SCHEMATA), 'bw') as schemata: log.log(logger.TRACE, 'resctrl: write(%s): %r', schemata.name, root_rdt_mb) try: schemata.write(bytes(root_rdt_mb + '\n', encoding='utf-8')) schemata.flush() except OSError as e: raise InvalidAllocations('Cannot set MB allocation for default group: %s' % e)
def validate(self): try: value = decode_listformat(self.value) except ValueError as e: raise InvalidAllocations('cannot decode list format %r: %s' % (self.value, e)) from e as_sorted_list = list(sorted(value)) assert self.max_value is not None, 'should be initialized by subclass' if len(self.value) > 0: if as_sorted_list[0] < self.min_value or as_sorted_list[-1] > self.max_value: raise InvalidAllocations( '{} not in range <{};{}>'.format(self.value, self.min_value, self.max_value)) else: log.debug('found cpuset/memset set to empty string!')
def validate(self, rdt_allocation_value): """Count the number of all resctrl groups and return error if number of groups is higher than allowed. """ self.existing_groups.add(rdt_allocation_value.get_resgroup_name()) if len(self.existing_groups) > self.closids_limit: raise InvalidAllocations( 'too many resource groups for available CLOSids')
def create(rdt_enabled: bool, tasks_allocations: TasksAllocations, containers, platform) \ -> 'TasksAllocationsValues': """Convert plain raw object TasksAllocations to boxed intelligent AllocationsDict that can be serialized to metrics, validated and can perform contained allocations. Beneath simple tasks allocations objects are augmented using data from runner: containers and platform to provide context to implement their responsibilities. Additionally local object rdt_groups is created to limit number of created RDTGroups and optimize writes for schemata file. """ # Shared object to optimize schemata write and detect CLOSids exhaustion. registry = { AllocationType.QUOTA: QuotaAllocationValue, AllocationType.SHARES: SharesAllocationValue, AllocationType.CPUSET: CPUSetAllocationValue, } if rdt_enabled: rdt_groups = RDTGroups(closids_limit=platform.rdt_information.num_closids) def rdt_allocation_value_constructor(rdt_allocation: RDTAllocation, container: ContainerInterface, common_labels: Dict[str, str]): return RDTAllocationValue( container.get_name(), rdt_allocation, container.get_resgroup(), container.get_pids, platform.sockets, platform.rdt_information, common_labels=common_labels, rdt_groups=rdt_groups, ) registry[AllocationType.RDT] = rdt_allocation_value_constructor task_id_to_containers = {task.task_id: container for task, container in containers.items()} task_id_to_labels = {task.task_id: task.labels for task, container in containers.items()} simple_dict = {} for task_id, task_allocations in tasks_allocations.items(): if task_id not in task_id_to_containers: raise InvalidAllocations('invalid task id %r' % task_id) else: container = task_id_to_containers[task_id] # Check consistency of container with RDT state. assert (container._platform.rdt_information is not None) == rdt_enabled extra_labels = dict(container_name=container.get_name(), task=task_id) extra_labels.update(task_id_to_labels[task_id]) allocation_value = TaskAllocationsValues.create( task_allocations, container, registry, extra_labels) allocation_value.validate() simple_dict[task_id] = allocation_value return TasksAllocationsValues(simple_dict)
def validate(self): """Check L3 mask according platform.rdt_ features.""" if self.rdt_allocation.l3: if not self.rdt_information.rdt_cache_control_enabled: raise InvalidAllocations( 'Allocator requested RDT cache allocation but ' 'RDT cache control is not enabled!') validate_l3_string(self.rdt_allocation.l3, self.platform_sockets, self.rdt_information.cbm_mask, self.rdt_information.min_cbm_bits) if self.rdt_allocation.mb: if not self.rdt_information.rdt_mb_control_enabled: raise InvalidAllocations( 'Allocator requested RDT MB allocation but ' 'RDT memory bandwidth is not enabled!') validate_mb_string(self.rdt_allocation.mb, self.platform_sockets, self.rdt_information.mb_min_bandwidth) self.rdt_groups.validate(self)
def validate_mb_string(mb, platform_sockets, mb_min_bandwidth): assert mb_min_bandwidth is not None if not mb.startswith('MB:'): raise InvalidAllocations( 'mb resources setting should start with "MB:" prefix (got %r)' % mb) domains = _parse_schemata_file_row(mb) _validate_domains(domains, platform_sockets) for mb_value in domains.values(): check_mb_value(mb_value, mb_min_bandwidth)
def _normalize_mb_value(mb_value: int, mb_min_bandwidth: int, mb_bandwidth_gran: int) -> int: """Ceil mb value to match granulation.""" if mb_value < mb_min_bandwidth: raise InvalidAllocations( "mb allocation smaller than minimum value {}".format( str(mb_min_bandwidth))) if mb_bandwidth_gran > 0: return math.ceil(mb_value / mb_bandwidth_gran) * mb_bandwidth_gran else: return mb_value
def validate_l3_string(l3, platform_sockets, rdt_cbm_mask, rdt_min_cbm_bits): assert rdt_cbm_mask is not None assert rdt_min_cbm_bits is not None if not l3.startswith('L3:'): raise InvalidAllocations( 'l3 resources setting should start with "L3:" prefix (got %r)' % l3) domains = _parse_schemata_file_row(l3) _validate_domains(domains, platform_sockets) for mask_value in domains.values(): check_cbm_mask(mask_value, rdt_cbm_mask, rdt_min_cbm_bits)
def validate_shares_allocation_for_kubernetes(tasks: List[Task], allocations: TasksAllocations): """Additional allocations validation step needed only for Kubernetes.""" # Ignore if not KubernetesNode. if not have_tasks_qos_label(tasks): return if not are_all_tasks_of_single_qos(tasks): for task_id, allocation in allocations.items(): if AllocationType.SHARES in allocation: raise InvalidAllocations('not all tasks are of the same Kubernetes QOS class ' 'and at least one of the allocation contains ' 'cpu share. Mixing QoS classes and shares allocation ' 'is not supported.')
def create(task_allocations: TaskAllocations, container: Container, registry: RegistryType, common_labels: Dict[str, str]) -> 'TaskAllocationsValues': """Factory function for TaskAllocationsValues based on raw task_allocations and container, registry and common_labels. Registry is used to map specific kinds of allocations to proper constructors. """ simple_dict = {} for allocation_type, raw_value in task_allocations.items(): if allocation_type not in registry: raise InvalidAllocations('unsupported allocation type: %r' % allocation_type) constructor = registry[allocation_type] allocation_value = constructor(raw_value, container, common_labels) simple_dict[allocation_type] = allocation_value return TaskAllocationsValues(simple_dict)
# Merge got_target_dict, got_changeset_dict = new_dict.calculate_changeset( current_dict) assert got_target_dict == convert_to_allocations_dict(expected_target) assert got_changeset_dict == convert_to_allocations_dict( expected_changeset) @pytest.mark.parametrize('allocation_dict, expected_error', [ (AllocationsDict({ 'bad_generic': Mock(spec=AllocationValue, validate=Mock( side_effect=InvalidAllocations('some generic error'))) }), 'some generic error'), (AllocationsDict({'x': BoxedNumericDummy(-1) }), 'does not belong to range'), (AllocationsDict({'x': AllocationsDict({'y': BoxedNumericDummy(-1)}) }), 'does not belong to range'), ]) def test_allocation_value_validate(allocation_dict, expected_error): with pytest.raises(InvalidAllocations, match=expected_error): allocation_dict.validate() @pytest.mark.parametrize('allocation_value, expected_metrics', [ (AllocationsDict({}), []), (BoxedNumericDummy(2), [allocation_metric('numeric', 2)]), (AllocationsDict({
def validate(self): super().validate() if self.platform.swap_enabled: raise InvalidAllocations( "Swap should be disabled due to possibility of OOM killer occurrence!")