def _append_anomaly(self, anomalies, res, cid, contenders, owca_metrics): anomaly = ContentionAnomaly( resource=res, contended_task_id=cid, contending_task_ids=contenders, metrics=owca_metrics ) anomalies.append(anomaly)
def anomaly(contended_task_id: TaskId, contending_task_ids: List[TaskId], metrics: List[Metric] = None): """Helper method to create simple anomaly for single task. It is always about memory contention.""" return ContentionAnomaly( contended_task_id=contended_task_id, contending_task_ids=contending_task_ids, resource=ContendedResource.MEMORY_BW, metrics=metrics or [], )
def detect(self, platform: Platform, tasks_measurements: TasksMeasurements, tasks_resources: TasksResources, tasks_labels: TasksLabels): anomalies = [] # Based on hostname generate skew of phase for different hosts, # to simulate contention alerting firing from multiple sources at different time. if self.skew: phase_skew = sum( hashlib.sha256(socket.gethostname().encode('UTF-8')).digest()) else: phase_skew = 0 # Find out moment of cycle. second_of_cycle = int(time.time() + phase_skew) % self.cycle_length # Make sure we have enough tasks (to simulate contention). if len(tasks_measurements) >= 10: resources = [ ContendedResource.CPUS, ContendedResource.LLC, ContendedResource.MEMORY_BW, ] # Define phases of simulation. if second_of_cycle < 10: # Single contention on one resource with single contender task. tasks_count = 1 resources_count = 1 metrics_count = 1 elif second_of_cycle < 20: # Single contention on two resources with single contender task # (with two additional metrics) tasks_count = 1 resources_count = 2 metrics_count = 2 elif second_of_cycle < 30: # Single contention on three resources with two contender tasks # (with two additional metrics each) tasks_count = 1 resources_count = 3 metrics_count = 2 elif second_of_cycle < 40: # Two contentions each on two resources with two contender tasks # (with two additional metrics each) tasks_count = 2 resources_count = 2 metrics_count = 3 elif second_of_cycle < 50: # Multiple (three) contentions each on single resource with single contender task # (with two additional metrics each) tasks_count = 3 resources_count = 1 metrics_count = 1 else: # Contention free period. resources_count = 0 tasks_count = 0 metrics_count = 0 log.info('detector simulation: tasks=%d resources=%d metrics=%d!', tasks_count, resources_count, metrics_count) # Make sure that we choose tasks pairs for generating faked contention. task_ids = sorted(tasks_measurements.keys()) # Predefined pairs of contended and contending tasks. task_pairs = [ (task_ids[0], task_ids[1:3]), # 0 vs 1,2 (task_ids[3], task_ids[4:5]), # 3 vs 4 (task_ids[6], task_ids[7:10]), # 6 vs 7,8,9 ] # Generate multiple contention based on scenario phase. for resource_idx in range(resources_count): for task_pair_idx in range(tasks_count): contended_task_id, contending_task_ids = task_pairs[ task_pair_idx] resource = resources[resource_idx] metrics = [ Metric(name="cpu_threshold_%d" % i, value="%d" % (i + 1) * 10, type="gauge") for i in range(metrics_count) ] anomalies.append( ContentionAnomaly( contended_task_id=contended_task_id, contending_task_ids=contending_task_ids, resource=resource, metrics=metrics, )) else: log.warning('not enough tasks %d to simulate contention!', len(tasks_measurements)) debugging_metrics = [ Metric( name='second_of_cycle', value=second_of_cycle, type="gauge", ) ] return anomalies, debugging_metrics