Exemple #1
0
 def _append_anomaly(self, anomalies, res, cid, contenders, owca_metrics):
     anomaly = ContentionAnomaly(
             resource=res,
             contended_task_id=cid,
             contending_task_ids=contenders,
             metrics=owca_metrics
         )
     anomalies.append(anomaly)
Exemple #2
0
def anomaly(contended_task_id: TaskId, contending_task_ids: List[TaskId],
            metrics: List[Metric] = None):
    """Helper method to create simple anomaly for single task.
    It is always about memory contention."""
    return ContentionAnomaly(
        contended_task_id=contended_task_id,
        contending_task_ids=contending_task_ids,
        resource=ContendedResource.MEMORY_BW,
        metrics=metrics or [],
    )
Exemple #3
0
    def detect(self, platform: Platform, tasks_measurements: TasksMeasurements,
               tasks_resources: TasksResources, tasks_labels: TasksLabels):

        anomalies = []

        # Based on hostname generate skew of phase for different hosts,
        # to simulate contention alerting firing from multiple sources at different time.
        if self.skew:
            phase_skew = sum(
                hashlib.sha256(socket.gethostname().encode('UTF-8')).digest())
        else:
            phase_skew = 0

        # Find out moment of cycle.
        second_of_cycle = int(time.time() + phase_skew) % self.cycle_length

        # Make sure we have enough tasks (to simulate contention).
        if len(tasks_measurements) >= 10:

            resources = [
                ContendedResource.CPUS,
                ContendedResource.LLC,
                ContendedResource.MEMORY_BW,
            ]

            # Define phases of simulation.
            if second_of_cycle < 10:
                # Single contention on one resource with single contender task.
                tasks_count = 1
                resources_count = 1
                metrics_count = 1
            elif second_of_cycle < 20:
                # Single contention on two resources with single contender task
                # (with two additional metrics)
                tasks_count = 1
                resources_count = 2
                metrics_count = 2
            elif second_of_cycle < 30:
                # Single contention on three resources with two contender tasks
                # (with two additional metrics each)
                tasks_count = 1
                resources_count = 3
                metrics_count = 2
            elif second_of_cycle < 40:
                # Two contentions each on two resources with two contender tasks
                # (with two additional metrics each)
                tasks_count = 2
                resources_count = 2
                metrics_count = 3
            elif second_of_cycle < 50:
                # Multiple (three) contentions each on single resource with single contender task
                # (with two additional metrics each)
                tasks_count = 3
                resources_count = 1
                metrics_count = 1
            else:
                # Contention free period.
                resources_count = 0
                tasks_count = 0
                metrics_count = 0

            log.info('detector simulation: tasks=%d resources=%d metrics=%d!',
                     tasks_count, resources_count, metrics_count)

            # Make sure that we choose tasks pairs for generating faked contention.
            task_ids = sorted(tasks_measurements.keys())

            # Predefined pairs of contended and contending tasks.
            task_pairs = [
                (task_ids[0], task_ids[1:3]),  # 0 vs 1,2
                (task_ids[3], task_ids[4:5]),  # 3 vs 4
                (task_ids[6], task_ids[7:10]),  # 6 vs 7,8,9
            ]

            # Generate multiple contention based on scenario phase.
            for resource_idx in range(resources_count):
                for task_pair_idx in range(tasks_count):

                    contended_task_id, contending_task_ids = task_pairs[
                        task_pair_idx]
                    resource = resources[resource_idx]
                    metrics = [
                        Metric(name="cpu_threshold_%d" % i,
                               value="%d" % (i + 1) * 10,
                               type="gauge") for i in range(metrics_count)
                    ]

                    anomalies.append(
                        ContentionAnomaly(
                            contended_task_id=contended_task_id,
                            contending_task_ids=contending_task_ids,
                            resource=resource,
                            metrics=metrics,
                        ))
        else:
            log.warning('not enough tasks %d to simulate contention!',
                        len(tasks_measurements))

        debugging_metrics = [
            Metric(
                name='second_of_cycle',
                value=second_of_cycle,
                type="gauge",
            )
        ]

        return anomalies, debugging_metrics