def _build_allocations_from_rules(tasks_data: TasksData, rules): tasks_allocations = {} # Iterate over rules and apply one by one. for rule_idx, rule in enumerate(rules): if 'allocations' not in rule: log.warning('StaticAllocator(%s): missing "allocations" - ignore!', rule_idx) continue log.debug('StaticAllocator(%s): processing %s rule.', rule_idx, '(%s)' % rule['name'] if 'name' in rule else '') new_task_allocations = rule['allocations'] if not new_task_allocations: log.log(TRACE, 'StaticAllocator(%s): allocations are empty - ignore!', rule_idx) continue # Convert if nessesary. if 'rdt' in new_task_allocations and isinstance( new_task_allocations['rdt'], dict): new_task_allocations[AllocationType.RDT] = RDTAllocation( **new_task_allocations['rdt']) # Prepare match_task_ids filter: if 'task_id' in rule: # by task_id task_id = rule['task_id'] match_task_ids = {task_id} log.log(TRACE, 'StaticAllocator(%s): match by task_id=%r', rule_idx, rule['task_id']) # Find all tasks that matches. elif 'labels' in rule: labels = rule['labels'] # by labels match_task_ids = set() for task, data in tasks_data.items(): matching_label_names = set(data.labels) & set(labels) for label_name in matching_label_names: if re.match(str(labels[label_name]), data.labels[label_name]): match_task_ids.add(task) log.log( TRACE, 'StaticAllocator(%s): match task %r by label=%s', rule_idx, task_id, label_name) else: # match everything log.log(TRACE, 'StaticAllocator(%s): match all tasks', rule_idx) match_task_ids = tasks_data.keys() # for matching tasks calculate and remember target_tasks_allocations log.log(TRACE, 'StaticAllocator(%s): applying allocations for %i tasks', rule_idx, len(match_task_ids)) rule_tasks_allocations = {} # Set rules for every matching task. for match_task_id in match_task_ids: rule_tasks_allocations[match_task_id] = new_task_allocations # Merge rules with previous rules. tasks_allocations = merge_rules(tasks_allocations, rule_tasks_allocations) return tasks_allocations
def detect(self, platform: Platform, tasks_data: TasksData): anomalies = [] # Based on hostname generate skew of phase for different hosts, # to simulate contention alerting firing from multiple sources at different time. if self.skew: phase_skew = sum( hashlib.sha256(socket.gethostname().encode('UTF-8')).digest()) else: phase_skew = 0 # Find out moment of cycle. second_of_cycle = int(time.time() + phase_skew) % self.cycle_length # Make sure we have enough tasks (to simulate contention). if len(tasks_data) >= 10: resources = [ ContendedResource.CPUS, ContendedResource.LLC, ContendedResource.MEMORY_BW, ] # Define phases of simulation. if second_of_cycle < 10: # Single contention on one resource with single contender task. tasks_count = 1 resources_count = 1 metrics_count = 1 elif second_of_cycle < 20: # Single contention on two resources with single contender task # (with two additional metrics). tasks_count = 1 resources_count = 2 metrics_count = 2 elif second_of_cycle < 30: # Single contention on three resources with two contender tasks # (with two additional metrics each). tasks_count = 1 resources_count = 3 metrics_count = 2 elif second_of_cycle < 40: # Two contentions each on two resources with two contender tasks # (with two additional metrics each). tasks_count = 2 resources_count = 2 metrics_count = 3 elif second_of_cycle < 50: # Multiple (three) contentions each on single resource with single contender task # (with two additional metrics each). tasks_count = 3 resources_count = 1 metrics_count = 1 else: # Contention free period. resources_count = 0 tasks_count = 0 metrics_count = 0 log.info('detector simulation: tasks=%d resources=%d metrics=%d!', tasks_count, resources_count, metrics_count) # Make sure that we choose tasks pairs for generating faked contention. task_ids = sorted(tasks_data.keys()) # Predefined pairs of contended and contending tasks. task_pairs = [ (task_ids[0], task_ids[1:3]), # 0 vs 1,2 (task_ids[3], task_ids[4:5]), # 3 vs 4 (task_ids[6], task_ids[7:10]), # 6 vs 7,8,9 ] # Generate multiple contention based on scenario phase. for resource_idx in range(resources_count): for task_pair_idx in range(tasks_count): contended_task_id, contending_task_ids = task_pairs[ task_pair_idx] resource = resources[resource_idx] metrics = [ Metric(name="cpu_threshold_%d" % i, value="%d" % (i + 1) * 10, type="gauge") for i in range(metrics_count) ] anomalies.append( ContentionAnomaly( contended_task_id=contended_task_id, contending_task_ids=contending_task_ids, resource=resource, metrics=metrics, )) else: log.warning('not enough tasks %d to simulate contention!', len(tasks_data)) debugging_metrics = [ Metric( name='second_of_cycle', value=second_of_cycle, type="gauge", ) ] return anomalies, debugging_metrics