def test_performance_sanity(self) -> ResultBundle: """ Assert that higher CPU frequency leads to more work done """ res = ResultBundle.from_bool(True) cpu_items = { cpu: { # We expect only one item per frequency item.freq: item for item in freq_items } for cpu, freq_items in groupby(self.sanity_items, key=lambda item: item.cpu) } failed = [] passed = True for cpu, freq_items in cpu_items.items(): sorted_items = sorted(freq_items.values(), key=lambda item: item.freq) work = [item.work for item in sorted_items] if work != sorted(work): passed = False failed.append(cpu) res = ResultBundle.from_bool(passed) work_metric = { cpu: {freq: item.work for freq, item in freq_items.items()} for cpu, freq_items in cpu_items.items() } res.add_metric('CPUs work', work_metric) res.add_metric('Failed CPUs', failed) return res
def test_task_placement(self, energy_est_threshold_pct=5, nrg_model: EnergyModel = None, capacity_margin_pct=20) -> ResultBundle: """ Test that task placement was energy-efficient :param nrg_model: Allow using an alternate EnergyModel instead of ``nrg_model``` :type nrg_model: EnergyModel :param energy_est_threshold_pct: Allowed margin for estimated vs optimal task placement energy cost :type energy_est_threshold_pct: int Compute optimal energy consumption (energy-optimal task placement) and compare to energy consumption estimated from the trace. Check that the estimated energy does not exceed the optimal energy by more than ``energy_est_threshold_pct``` percents. """ nrg_model = nrg_model or self.nrg_model exp_power = self._get_expected_power_df(nrg_model, capacity_margin_pct) est_power = self._get_estimated_power_df(nrg_model) exp_energy = series_integrate(exp_power.sum(axis=1), method='rect') est_energy = series_integrate(est_power.sum(axis=1), method='rect') msg = f'Estimated {est_energy} bogo-Joules to run workload, expected {exp_energy}' threshold = exp_energy * (1 + (energy_est_threshold_pct / 100)) passed = est_energy < threshold res = ResultBundle.from_bool(passed) res.add_metric("estimated energy", est_energy, 'bogo-joules') res.add_metric("energy threshold", threshold, 'bogo-joules') return res
def _test_cpus_busy(self, task_state_dfs, cpus, allowed_idle_time_s): """ Test that for every window in which the tasks are running, :attr:`cpus` are not idle for more than :attr:`allowed_idle_time_s` """ if allowed_idle_time_s is None: # Regular interval is 1 ms * nr_cpus, rounded to closest jiffy multiple jiffy = 1 / self.plat_info['kernel']['config']['CONFIG_HZ'] interval = 1e-3 * self.plat_info["cpus-count"] allowed_idle_time_s = ceil(interval / jiffy) * jiffy res = ResultBundle.from_bool(True) for task, state_df in task_state_dfs.items(): # Have a look at every task activation task_idle_times = [self._max_idle_time(index, index + row.delta, cpus) for index, row in state_df.iterrows()] if not task_idle_times: continue max_time, max_cpu = max(task_idle_times) res.add_metric("{} max idle".format(task), data={ "time": TestMetric(max_time, "seconds"), "cpu": TestMetric(max_cpu)}) if max_time > allowed_idle_time_s: res.result = Result.FAILED return res
def _test_cpus_busy(self, task_state_dfs, cpus, allowed_idle_time_s): """ Test that for every window in which the tasks are running, :attr:`cpus` are not idle for more than :attr:`allowed_idle_time_s` """ if allowed_idle_time_s is None: allowed_idle_time_s = 1e-3 * self.plat_info["cpus-count"] res = ResultBundle.from_bool(True) for task, state_df in task_state_dfs.items(): # Have a look at every task activation task_idle_times = [ self._max_idle_time(index, index + row.delta, cpus) for index, row in state_df.iterrows() ] if not task_idle_times: continue max_time, max_cpu = max(task_idle_times) res.add_metric("{} max idle".format(task), data={ "time": TestMetric(max_time, "seconds"), "cpu": TestMetric(max_cpu) }) if max_time > allowed_idle_time_s: res.result = Result.FAILED return res
def test_slack(self, negative_slack_allowed_pct=15) -> ResultBundle: """ Assert that the RTApp workload was given enough performance :param negative_slack_allowed_pct: Allowed percentage of RT-app task activations with negative slack. :type negative_slack_allowed_pct: int Use :class:`lisa.analysis.rta.PerfAnalysis` to find instances where the RT-App workload wasn't able to complete its activations (i.e. its reported "slack" was negative). Assert that this happened less than ``negative_slack_allowed_pct`` percent of the time. """ analysis = PerfAnalysis.from_dir(self.res_dir) passed = True bad_activations = {} for task in analysis.tasks: slack = analysis.get_df(task)["Slack"] bad_activations_pct = len(slack[slack < 0]) * 100 / len(slack) if bad_activations_pct > negative_slack_allowed_pct: passed = False bad_activations[task] = bad_activations_pct res = ResultBundle.from_bool(passed) for task, bad_activations_pct in bad_activations.items(): res.add_metric("{} delayed activations".format(task), bad_activations_pct, '%') return res
def test_ramp_boost(self, nrg_threshold_pct=0.1, bad_samples_threshold_pct=0.1) -> ResultBundle: """ Test that the energy boost feature is triggering as expected. """ # If there was no cost_margin sample to look at, that means boosting # was not exhibited by that test so we cannot conclude anything df = self.df_ramp_boost() self._plot_test_boost(df) if df.empty: return ResultBundle(Result.UNDECIDED) # Make sure the boost is always positive (negative cannot really happen # since the kernel is using unsigned arithmetic, but still check in # case there are some dataframe handling issues) assert not (df['expected_cost_margin'] < 0).any() assert not (df['cost_margin'] < 0).any() # "rect" method is accurate here since the signal is really following # "post" steps expected_boost_nrg = series_mean(df['expected_cost_margin']) actual_boost_nrg = series_mean(df['cost_margin']) # Check that the total amount of boost is close to expectations lower = max(0, expected_boost_nrg - nrg_threshold_pct) higher = expected_boost_nrg passed_overhead = lower <= actual_boost_nrg <= higher # Check the shape of the signal: actual boost must be lower or equal # than the expected one. good_shape_nr = (df['cost_margin'] <= df['expected_cost_margin']).sum() df_len = len(df) bad_shape_nr = df_len - good_shape_nr bad_shape_pct = bad_shape_nr / df_len * 100 # Tolerate a few bad samples that added too much boost passed_shape = bad_shape_pct < bad_samples_threshold_pct passed = passed_overhead and passed_shape res = ResultBundle.from_bool(passed) res.add_metric('expected boost energy overhead', expected_boost_nrg, '%') res.add_metric('boost energy overhead', actual_boost_nrg, '%') res.add_metric('bad boost samples', bad_shape_pct, '%') # Add some slack metrics and plots analysis = self.trace.analysis.rta for task in self.rtapp_tasks: analysis.plot_slack_histogram(task) analysis.plot_perf_index_histogram(task) analysis.plot_latency(task) res.add_metric('avg slack', self.get_avg_slack(), 'us') res.add_metric('avg negative slack', self.get_avg_slack(only_negative=True), 'us') return res
def test_slack(self, negative_slack_allowed_pct=15) -> ResultBundle: """ Assert that the RTApp workload was given enough performance :param negative_slack_allowed_pct: Allowed percentage of RT-app task activations with negative slack. :type negative_slack_allowed_pct: int Use :class:`lisa.analysis.rta.RTAEventsAnalysis` to find instances where the RT-App workload wasn't able to complete its activations (i.e. its reported "slack" was negative). Assert that this happened less than ``negative_slack_allowed_pct`` percent of the time. """ self._check_valid_placement() passed = True bad_activations = {} test_tasks = list(chain.from_iterable(self.rtapp_tasks_map.values())) for task in test_tasks: slack = self.trace.ana.rta.df_rtapp_stats(task)["slack"] bad_activations_pct = len(slack[slack < 0]) * 100 / len(slack) if bad_activations_pct > negative_slack_allowed_pct: passed = False bad_activations[task] = bad_activations_pct res = ResultBundle.from_bool(passed) for task, bad_activations_pct in bad_activations.items(): res.add_metric(f"{task} delayed activations", bad_activations_pct, '%') return res
def test_stune_task_placement(self, bad_cpu_margin_pct=10) -> ResultBundle: """ Test that the task placement satisfied the boost requirement Check that top-app tasks spend no more than ``bad_cpu_margin_pct`` of their time on CPUs that don't have enough capacity to serve their boost. """ assert len(self.rtapp_tasks) == 1 task = self.rtapp_tasks[0] df = self.trace.analysis.tasks.df_task_total_residency(task) # Find CPUs without enough capacity to meet the boost boost = self.boost cpu_caps = self.plat_info['cpu-capacities'] ko_cpus = list( filter(lambda x: (cpu_caps[x] / 10.24) < boost, cpu_caps)) # Count how much time was spend on wrong CPUs time_ko = 0 total_time = 0 for cpu in cpu_caps: t = df['runtime'][cpu] if cpu in ko_cpus: time_ko += t total_time += t pct_ko = time_ko * 100 / total_time res = ResultBundle.from_bool(pct_ko < bad_cpu_margin_pct) res.add_metric("time spent on inappropriate CPUs", pct_ko, '%') res.add_metric("boost", boost, '%') return res
def _test_behaviour(self, signal_name, error_margin_pct): task = self.task_name phase = self.wlgen_task.phases[0] df = self.get_simulated_pelt(task, signal_name) cpus = sorted(phase['cpus']) assert len(cpus) == 1 cpu = cpus[0] expected_duty_cycle_pct = phase['wload'].unscaled_duty_cycle_pct(self.plat_info) expected_final_util = expected_duty_cycle_pct / 100 * UTIL_SCALE settling_time = pelt_settling_time(10, init=0, final=expected_final_util) settling_time += df.index[0] df = df[settling_time:] # Instead of taking the mean, take the average between the min and max # values of the settled signal. This avoids the bias introduced by the # fact that the util signal stays high while the task sleeps settled_signal_mean = kernel_util_mean(df[signal_name], plat_info=self.plat_info) expected_signal_mean = expected_final_util signal_mean_error_pct = abs(expected_signal_mean - settled_signal_mean) / UTIL_SCALE * 100 res = ResultBundle.from_bool(signal_mean_error_pct < error_margin_pct) res.add_metric('expected mean', expected_signal_mean) res.add_metric('settled mean', settled_signal_mean) res.add_metric('settled mean error', signal_mean_error_pct, '%') self._plot_pelt(task, signal_name, df['simulated'], 'behaviour') res = self._add_cpu_metric(res) return res
def _test_correctness(self, signal_name, mean_error_margin_pct, max_error_margin_pct): task = self.task_name df = self.get_simulated_pelt(task, signal_name) abs_error = df['error'].abs() mean_error_pct = series_mean(abs_error) / UTIL_SCALE * 100 max_error_pct = abs_error.max() / UTIL_SCALE * 100 mean_ok = mean_error_pct <= mean_error_margin_pct max_ok = max_error_pct <= max_error_margin_pct res = ResultBundle.from_bool(mean_ok and max_ok) res.add_metric('actual mean', series_mean(df[signal_name])) res.add_metric('simulated mean', series_mean(df['simulated'])) res.add_metric('mean error', mean_error_pct, '%') res.add_metric('actual max', df[signal_name].max()) res.add_metric('simulated max', df['simulated'].max()) res.add_metric('max error', max_error_pct, '%') self._plot_pelt(task, signal_name, df['simulated'], 'correctness') res = self._add_cpu_metric(res) return res
def _test_signal(self, signal_name, allowed_error_pct): passed = True expected_data = {} trace_data = {} capacity = self.plat_info['cpu-capacities'][self.cpu] # Scale the capacity linearly according to the frequency max_freq = max(self.plat_info['freqs'][self.cpu]) capacity *= (self.freq / max_freq) for name, task in self.rtapp_profile.items(): ok, exp_util, signal_mean = self._test_task_signal( signal_name, allowed_error_pct, self.trace, self.cpu, name, capacity) if not ok: passed = False expected_data[name] = TestMetric(exp_util) trace_data[name] = TestMetric(signal_mean) freq_str = '@{}'.format(self.freq) if self.freq is not None else '' bundle = ResultBundle.from_bool(passed) bundle.add_metric("cpu", '{}{}'.format(self.cpu, freq_str)) bundle.add_metric("Expected signals", expected_data) bundle.add_metric("Trace signals", trace_data) return bundle
def test_cpus_alive(self) -> ResultBundle: """ Test that all CPUs came back online after the hotplug operations """ res = ResultBundle.from_bool(self.hotpluggable_cpus == self.live_cpus) res.add_metric("hotpluggable CPUs", self.hotpluggable_cpus) res.add_metric("Online CPUs", self.live_cpus) return res
def test_output(self): passed = False for line in self.shell_output: if '42' in line: passed = True break return ResultBundle.from_bool(passed)
def test_cpus_alive(self) -> ResultBundle: """ Test that all CPUs came back online after the hotplug operations """ res = ResultBundle.from_bool(self.hotpluggable_cpus == self.live_cpus) dead_cpus = sorted(set(self.hotpluggable_cpus) - set(self.live_cpus)) res.add_metric("dead CPUs", dead_cpus) res.add_metric("number of dead CPUs", len(dead_cpus)) return res
def test_util_task_migration(self, allowed_error_pct=3) -> ResultBundle: """ Test that a migrated task properly propagates its utilization at the CPU level :param allowed_error_pct: How much the trace averages can stray from the expected values :type allowed_error_pct: float """ expected_util = self.get_expected_cpu_util() trace_util = self.get_trace_cpu_util() passed = True expected_metrics = {} trace_metrics = {} deltas = {} for cpu in self.cpus: expected_cpu_util = expected_util[cpu] trace_cpu_util = trace_util[cpu] cpu_str = f"cpu{cpu}" expected_metrics[cpu_str] = TestMetric({}) trace_metrics[cpu_str] = TestMetric({}) deltas[cpu_str] = TestMetric({}) for phase in sorted(trace_cpu_util.keys() & expected_cpu_util.keys()): # TODO: remove that once we have named phases to skip the buffer phase if phase == 0: continue expected_phase_util = expected_cpu_util[phase] trace_phase_util = trace_cpu_util[phase] is_equal, delta = self.is_almost_equal(expected_phase_util, trace_phase_util, allowed_error_pct) if not is_equal: passed = False # Just some verbose metric collection... phase_str = f"phase{phase}" expected_metrics[cpu_str].data[phase_str] = TestMetric( expected_phase_util) trace_metrics[cpu_str].data[phase_str] = TestMetric( trace_phase_util) deltas[cpu_str].data[phase_str] = TestMetric(delta, "%") res = ResultBundle.from_bool(passed) res.add_metric("Expected utilization", expected_metrics) res.add_metric("Trace utilization", trace_metrics) res.add_metric("Utilization deltas", deltas) self._plot_util() return res
def test_placement(self) -> ResultBundle: """ For each phase, checks if the task placement is compatible with UtilClamp requirements. This is done by comparing the maximum capacity of the CPU on which the task has been placed, with the UtilClamp value. """ metrics = {} test_failures = [] capacity_margin = self.CAPACITY_MARGIN cpu_max_capacities = self.plat_info['cpu-capacities']['rtapp'] def parse_phase(df, phase): uclamp_val = phase['uclamp_val'] num_activations = df['active'][df['active'] == 1].count() cpus = set(df.cpu.dropna().unique()) fitting_cpus = { cpu for cpu, cap in cpu_max_capacities.items() if (cap == PELT_SCALE) or (cap * capacity_margin) > uclamp_val } failures = df[(df['active'] == 1) & (df['cpu'].isin(cpus - fitting_cpus))].index.tolist() num_failures = len(failures) test_failures.extend(failures) phase_str = f"Phase-{phase['phase']}" metrics[phase_str] = { 'uclamp-min': TestMetric(uclamp_val), 'cpu-placements': TestMetric(cpus), 'expected-cpus': TestMetric(fitting_cpus), 'bad-activations': TestMetric(num_failures * 100 / num_activations, "%"), } return cpus.issubset(fitting_cpus) res = ResultBundle.from_bool(self._for_each_phase(parse_phase).all()) res.add_metric('Phases', metrics) self._plot_phases('test_placement', test_failures) return res
def test_util_task_migration(self, allowed_error_pct=5) -> ResultBundle: """ Test that a migrated task properly propagates its utilization at the CPU level :param allowed_error_pct: How much the trace averages can stray from the expected values :type allowed_error_pct: float """ expected_cpu_util = self.get_expected_cpu_util() trace_cpu_util = self.get_trace_cpu_util() passed = True expected_metrics = {} trace_metrics = {} deltas = {} for cpu in self.cpus: cpu_str = "cpu{}".format(cpu) expected_metrics[cpu_str] = TestMetric({}) trace_metrics[cpu_str] = TestMetric({}) deltas[cpu_str] = TestMetric({}) for phase in range(self.nr_phases): if not self.is_almost_equal(trace_cpu_util[cpu][phase], expected_cpu_util[cpu][phase], allowed_error_pct): passed = False # Just some verbose metric collection... phase_str = "phase{}".format(phase) expected = expected_cpu_util[cpu][phase] trace = trace_cpu_util[cpu][phase] delta = 100 * (trace - expected) / expected expected_metrics[cpu_str].data[phase_str] = TestMetric( expected) trace_metrics[cpu_str].data[phase_str] = TestMetric(trace) deltas[cpu_str].data[phase_str] = TestMetric(delta, "%") res = ResultBundle.from_bool(passed) res.add_metric("Expected utilization", expected_metrics) res.add_metric("Trace utilization", trace_metrics) res.add_metric("Utilization deltas", deltas) return res
def test_performance_sanity(self) -> ResultBundle: """ Assert that higher CPU frequency leads to more work done """ res = ResultBundle.from_bool(True) for cpu, freq_work in self.cpu_work.items(): sorted_freqs = sorted(freq_work.keys()) work = [freq_work[freq] for freq in sorted_freqs] if not work == sorted(work): res.result = Result.FAILED res.add_metric("CPU{} work".format(cpu), freq_work) return res
def test_capacity_sanity(self) -> ResultBundle: """ Assert that higher CPU capacity means more work done """ sorted_capacities = sorted(self.capacity_work.keys()) work = [self.capacity_work[cap] for cap in sorted_capacities] # Check the list of work units is monotonically increasing work_increasing = (work == sorted(work)) res = ResultBundle.from_bool(work_increasing) capa_score = {} for capacity, work in self.capacity_work.items(): capa_score[capacity] = TestMetric(work) res.add_metric("Capacity to performance", capa_score) return res
def test_task_remains(self) -> ResultBundle: """ Test that task remains on the same core """ test_passed = True metrics = {} for task_id in self.rtapp_task_ids: cpu_df = self._get_task_cpu_df(task_id) core_migrations = len(cpu_df.index) metrics[task_id] = TestMetric(core_migrations) # Ideally, task with 50% utilization # should stay on the same core if core_migrations > 1: test_passed = False res = ResultBundle.from_bool(test_passed) res.add_metric("Migrations", metrics) return res
def test_preempt_time(self, allowed_preempt_pct=1) -> ResultBundle: """ Test that tasks are not being preempted too much """ sdf = self.trace.df_events('sched_switch') task_state_dfs = { task : self.trace.analysis.tasks.df_task_states(task) for task in self.rtapp_tasks } res = ResultBundle.from_bool(True) for task, state_df in task_state_dfs.items(): # The sched_switch dataframe where the misfit task # is replaced by another misfit task preempt_sdf = sdf[ (sdf.prev_comm == task) & (sdf.next_comm.str.startswith(self.task_prefix)) ] state_df = self._trim_state_df( state_df[ (state_df.index.isin(preempt_sdf.index)) & # Ensure this is a preemption and not just the task ending (state_df.curr_state == TaskState.TASK_INTERRUPTIBLE) ] ) preempt_time = state_df.delta.sum() preempt_pct = (preempt_time / self.duration) * 100 res.add_metric("{} preemption".format(task), { "ratio" : TestMetric(preempt_pct, "%"), "time" : TestMetric(preempt_time, "seconds")}) if preempt_pct > allowed_preempt_pct: res.result = Result.FAILED return res
def test_stune_frequency(self, freq_margin_pct=10) -> ResultBundle: """ Test that frequency selection followed the boost :param: freq_margin_pct: Allowed margin between estimated and measured average frequencies :type freq_margin_pct: int Compute the expected frequency given the boost level and compare to the real average frequency from the trace. Check that the difference between expected and measured frequencies is no larger than ``freq_margin_pct``. """ kernel_version = self.plat_info['kernel']['version'] if kernel_version.parts[:2] < (4, 14): self.get_logger().warning( 'This test requires the RT boost hold, but it may be disabled in {}' .format(kernel_version)) cpu = self.plat_info['capacity-classes'][-1][0] freqs = self.plat_info['freqs'][cpu] max_freq = max(freqs) # Estimate the target frequency, including sugov's margin, and round # into a real OPP boost = self.boost target_freq = min(max_freq, max_freq * boost / 80) target_freq = list(filter(lambda f: f >= target_freq, freqs))[0] # Get the real average frequency avg_freq = self.trace.analysis.frequency.get_average_cpu_frequency(cpu) distance = abs(target_freq - avg_freq) * 100 / target_freq res = ResultBundle.from_bool(distance < freq_margin_pct) res.add_metric("target freq", target_freq, 'kHz') res.add_metric("average freq", avg_freq, 'kHz') res.add_metric("boost", boost, '%') return res
def _test_signal(self, signal_name, allowed_error_pct): passed = True expected_data = {} trace_data = {} capacity = self._get_freq_capa(self.cpu, self.freq, self.plat_info) for name in self.rtapp_tasks: ok, exp_util, signal_mean = self._test_task_signal( signal_name, allowed_error_pct, self.trace, self.cpu, name, capacity) if not ok: passed = False expected_data[name] = TestMetric(exp_util) trace_data[name] = TestMetric(signal_mean) freq_str = '@{}'.format(self.freq) if self.freq is not None else '' bundle = ResultBundle.from_bool(passed) bundle.add_metric("cpu", '{}{}'.format(self.cpu, freq_str)) bundle.add_metric("Expected signals", expected_data) bundle.add_metric("Trace signals", trace_data) return bundle
def _test_range(self, signal_name, allowed_error_pct): res = ResultBundle.from_bool(True) task = self.rtapp_profile[self.task_name] cpu = task.phases[0].cpus[0] # Note: This test-case is only valid if executed at capacity == 1024. # The below assertion is insufficient as it only checks the CPU can potentially # reach a capacity of 1024. assert self.plat_info["cpu-capacities"][cpu] == UTIL_SCALE peltsim, pelt_task, sim_df = self.get_simulated_pelt(cpu, signal_name) signal_df = self.get_task_sched_signal(cpu, signal_name) sim_range = peltsim.stableRange(pelt_task) # Get signal statistics in a period of time where the signal is # supposed to be stable signal_stats = signal_df[UTIL_AVG_CONVERGENCE_TIME_S:][ signal_name].describe() expected_data = {} trace_data = {} for stat in ['min', 'max']: stat_value = getattr(sim_range, '{}_value'.format(stat)) if not self.is_almost_equal(stat_value, signal_stats[stat], allowed_error_pct): res.result = Result.FAILED trace_data[stat] = TestMetric(signal_stats[stat]) expected_data[stat] = TestMetric(stat_value) res.add_metric("Trace signal", trace_data) res.add_metric("Expected signal", expected_data) return res
def test_activations(self) -> ResultBundle: """ Test signals are properly "aggregated" at enqueue/dequeue time. On fast-ramp systems, `util_est_enqueud` is expected to be always smaller than `util_est_ewma`. On non fast-ramp systems, the `util_est_enqueued` is expected to be smaller then `util_est_ewma` in ramp-down phases, or bigger in ramp-up phases. Those conditions are checked on a single execution of a task which has three main behaviours: * STABLE: periodic big task running for a relatively long period to ensure `util_avg` saturation. * DOWN: periodic ramp-down task, to slowly decay `util_avg` * UP: periodic ramp-up task, to slowly increase `util_avg` """ failure_reasons = {} metrics = {} # We have only two task: the main 'rt-app' task and our 'test_task' test_task = self.trace.analysis.rta.rtapp_tasks[-1] # Get list of task's activations df = self.trace.analysis.tasks.df_task_states(test_task) activations = df[(df.curr_state == TaskState.TASK_WAKING) & (df.next_state == TaskState.TASK_ACTIVE)].index # Check task signals at each activation df = self.trace.df_events('sched_util_est_task') df = df_filter_task_ids(df, [test_task]) # Define a time interval to correlate relative trace events. def restrict(df, time, delta=1e-3): return df[time - delta:time + delta] failures = [] for idx, activation in enumerate(activations): avg, enq, ewma = restrict(df, activation)[[ 'util_avg', 'util_est_enqueued', 'util_est_ewma' ]].iloc[-1] metrics[idx + 1] = ActivationSignals(activation, avg, enq, ewma) # UtilEst is not updated when within 1% of previous activation if 1.01 * enq < avg: failure_reasons[idx] = 'enqueued({}) smaller than util_avg({}) @ {}'\ .format(enq, avg, activation) failures.append(activation) continue # Running on FastRamp kernels: if self.fast_ramp: # STABLE, DOWN and UP: if enq > ewma: failure_reasons[idx] = 'enqueued({}) bigger than ewma({}) @ {}'\ .format(enq, ewma, activation) failures.append(activation) continue # Running on (legacy) non FastRamp kernels: else: phase = self.trace.analysis.rta.task_phase_at( test_task, activation) # STABLE: ewma ramping up if phase.id == 0 and enq < ewma: failure_reasons[idx] = 'enqueued({}) smaller than ewma({}) @ {}'\ .format(enq, ewma, activation) failures.append(activation) continue # DOWN: ewma ramping down if 0 < phase.id < 5 and enq > ewma: failure_reasons[idx] = 'enqueued({}) bigger than ewma({}) @ {}'\ .format(enq, ewma, activation) failures.append(activation) continue # UP: ewma ramping up if phase.id > 4 and enq < ewma: failure_reasons[idx] = 'enqueued({}) smaller than ewma({}) @ {}'\ .format(enq, ewma, activation) failures.append(activation) continue self._plot_signals(test_task, 'activations', failures) bundle = ResultBundle.from_bool(not failure_reasons) bundle.add_metric("signals", metrics) bundle.add_metric("failure reasons", failure_reasons) return bundle
def test_areas(self) -> ResultBundle: """ Test signals are properly "dominated". The integral of `util_est_enqueued` is expected to be always not smaller than that of `util_avg`, since this last is subject to decays while the first not. The integral of `util_est_enqueued` is expected to be always greater or equal than the integral of `util_avg`, since this `util_avg` is subject to decays while `util_est_enqueued` not. On fast-ramp systems, the `util_est_ewma` signal is never smaller then the `util_est_enqueued`, thus his integral is expected to be bigger. On non fast-ramp systems instead, the `util_est_ewma` is expected to be smaller then `util_est_enqueued` in ramp-up phases, or bigger in ramp-down phases. Those conditions are checked on a single execution of a task which has three main behaviours: * STABLE: periodic big task running for a relatively long period to ensure `util_avg` saturation. * DOWN: periodic ramp-down task, to slowly decay `util_avg` * UP: periodic ramp-up task, to slowly increase `util_avg` """ failure_reasons = {} metrics = {} # We have only two task: the main 'rt-app' task and our 'test_task' test_task = self.trace.analysis.rta.rtapp_tasks[-1] ue_df = self.trace.df_events('sched_util_est_task') ue_df = df_filter_task_ids(ue_df, [test_task]) ua_df = self.trace.analysis.load_tracking.df_tasks_signal('util') ua_df = df_filter_task_ids(ua_df, [test_task]) failures = [] for phase in self.trace.analysis.rta.task_phase_windows(test_task): phase_df = ue_df[phase.start:phase.end] area_enqueued = series_integrate(phase_df.util_est_enqueued) area_ewma = series_integrate(phase_df.util_est_ewma) phase_df = ua_df[phase.start:phase.end] area_util = series_integrate(phase_df.util) metrics[phase.id] = PhaseStats(phase.start, phase.end, area_util, area_enqueued, area_ewma) phase_name = "phase {}".format(phase.id) if area_enqueued < area_util: failure_reasons[ phase_name] = 'Enqueued smaller then Util Average' failures.append(phase.start) continue # Running on FastRamp kernels: if self.fast_ramp: # STABLE, DOWN and UP: if area_ewma < area_enqueued: failure_reasons[ phase_name] = 'NO_FAST_RAMP: EWMA smaller then Enqueued' failures.append(phase.start) continue # Running on (legacy) non FastRamp kernels: else: # STABLE: ewma ramping up if phase.id == 0 and area_ewma > area_enqueued: failure_reasons[ phase_name] = 'FAST_RAMP(STABLE): EWMA bigger then Enqueued' failures.append(phase.start) continue # DOWN: ewma ramping down if 0 < phase.id < 5 and area_ewma < area_enqueued: failure_reasons[ phase_name] = 'FAST_RAMP(DOWN): EWMA smaller then Enqueued' failures.append(phase.start) continue # UP: ewma ramping up if phase.id > 4 and area_ewma > area_enqueued: failure_reasons[ phase_name] = 'FAST_RAMP(UP): EWMA bigger then Enqueued' failures.append(phase.start) continue bundle = ResultBundle.from_bool(failure_reasons) bundle.add_metric("fast ramp", self.fast_ramp) bundle.add_metric("phases stats", metrics) if not failure_reasons: return bundle # Plot signals to support debugging analysis self._plot_signals(test_task, 'areas', failures) bundle.add_metric("failure reasons", failure_reasons) return bundle
def test_activations(self) -> ResultBundle: """ Test signals are properly "aggregated" at enqueue/dequeue time. On fast-ramp systems, `enqueued` is expected to be always smaller than `ewma`. On non fast-ramp systems, the `enqueued` is expected to be smaller then `ewma` in ramp-down phases, or bigger in ramp-up phases. Those conditions are checked on a single execution of a task which has three main behaviours: * STABLE: periodic big task running for a relatively long period to ensure `util` saturation. * DOWN: periodic ramp-down task, to slowly decay `util` * UP: periodic ramp-up task, to slowly increase `util` """ metrics = {} task = self.rtapp_task_ids_map['test'][0] # Get list of task's activations df = self.trace.ana.tasks.df_task_states(task) activations = df[(df.curr_state == TaskState.TASK_WAKING) & (df.next_state == TaskState.TASK_ACTIVE)].index # Check task signals at each activation df = self.trace.df_event('sched_util_est_se') df = df_filter_task_ids(df, [task]) for idx, activation in enumerate(activations): # Get the value of signals at their first update after the activation row = df_window(df, (activation, None), method='post').iloc[0] # It can happen that the first updated after the activation is # actually in the next phase, in which case we need to check the # util values against the right phase activation = row.name # If we are outside a phase, ignore the activation try: phase = self.trace.ana.rta.task_phase_at( task, activation, wlgen_profile=self.rtapp_profile) except KeyError: continue util = row['util'] enq = row['enqueued'] ewma = row['ewma'] def make_issue(msg): return msg.format( util=f'util={util}', enq=f'enqueued={enq}', ewma=f'ewma={ewma}', ) issue = None # UtilEst is not updated when within 1% of previous activation if 1.01 * enq < util: issue = make_issue('{enq} smaller than {util}') # Running on FastRamp kernels: elif self.fast_ramp: # ewma stable, down and up if enq > ewma: issue = make_issue('{enq} bigger than {ewma}') # Running on (legacy) non FastRamp kernels: else: if not phase.properties['meta']['from_test']: continue # ewma stable if phase.id.startswith('test/stable'): if enq < ewma: issue = make_issue('stable: {enq} smaller than {ewma}') # ewma ramping down elif phase.id.startswith('test/ramp_down'): if enq > ewma: issue = make_issue( 'ramp down: {enq} bigger than {ewma}') # ewma ramping up elif phase.id.startswith('test/ramp_up'): if enq < ewma: issue = make_issue( 'ramp up: {enq} smaller than {ewma}') metrics[idx] = ActivationSignals(activation, util, enq, ewma, issue) failures = [(idx, activation_signals) for idx, activation_signals in metrics.items() if activation_signals.issue] bundle = ResultBundle.from_bool(not failures) bundle.add_metric("failures", sorted(idx for idx, activation in failures)) bundle.add_metric("activations", metrics) failures_time = [activation.time for idx, activation in failures] self._plot_signals(task, 'activations', failures_time) return bundle
def test_means(self) -> ResultBundle: """ Test signals are properly "dominated". The mean of `enqueued` is expected to be always not smaller than that of `util`, since this last is subject to decays while the first not. The mean of `enqueued` is expected to be always greater or equal than the mean of `util`, since this `util` is subject to decays while `enqueued` not. On fast-ramp systems, the `ewma` signal is never smaller then the `enqueued`, thus his mean is expected to be bigger. On non fast-ramp systems instead, the `ewma` is expected to be smaller then `enqueued` in ramp-up phases, or bigger in ramp-down phases. Those conditions are checked on a single execution of a task which has three main behaviours: * STABLE: periodic big task running for a relatively long period to ensure `util` saturation. * DOWN: periodic ramp-down task, to slowly decay `util` * UP: periodic ramp-up task, to slowly increase `util` """ failure_reasons = {} metrics = {} task = self.rtapp_task_ids_map['test'][0] ue_df = self.trace.df_event('sched_util_est_se') ue_df = df_filter_task_ids(ue_df, [task]) ua_df = self.trace.ana.load_tracking.df_task_signal(task, 'util') failures = [] for phase in self.trace.ana.rta.task_phase_windows( task, wlgen_profile=self.rtapp_profile): if not phase.properties['meta']['from_test']: continue apply_phase_window = functools.partial(df_refit_index, window=(phase.start, phase.end)) ue_phase_df = apply_phase_window(ue_df) mean_enqueued = series_mean(ue_phase_df['enqueued']) mean_ewma = series_mean(ue_phase_df['ewma']) ua_phase_df = apply_phase_window(ua_df) mean_util = series_mean(ua_phase_df['util']) def make_issue(msg): return msg.format( util=f'util={mean_util}', enq=f'enqueued={mean_enqueued}', ewma=f'ewma={mean_ewma}', ) issue = None if mean_enqueued < mean_util: issue = make_issue('{enq} smaller than {util}') # Running on FastRamp kernels: elif self.fast_ramp: # STABLE, DOWN and UP: if mean_ewma < mean_enqueued: issue = make_issue( 'no fast ramp: {ewma} smaller than {enq}') # Running on (legacy) non FastRamp kernels: else: # STABLE: ewma ramping up if phase.id.startswith('test/stable'): if mean_ewma > mean_enqueued: issue = make_issue( 'fast ramp, stable: {ewma} bigger than {enq}') # DOWN: ewma ramping down elif phase.id.startswith('test/ramp_down'): if mean_ewma < mean_enqueued: issue = make_issue( 'fast ramp, down: {ewma} smaller than {enq}') # UP: ewma ramping up elif phase.id.startswith('test/ramp_up'): if mean_ewma > mean_enqueued: issue = make_issue( 'fast ramp, up: {ewma} bigger than {enq}') metrics[phase.id] = PhaseStats(phase.start, phase.end, mean_util, mean_enqueued, mean_ewma, issue) failures = [(phase, stat) for phase, stat in metrics.items() if stat.issue] # Plot signals to support debugging analysis self._plot_signals(task, 'means', sorted(stat.start for phase, stat in failures)) bundle = ResultBundle.from_bool(not failures) bundle.add_metric("fast ramp", self.fast_ramp) bundle.add_metric("phases", metrics) bundle.add_metric("failures", sorted(phase for phase, stat in failures)) return bundle
def test_freq_selection(self) -> ResultBundle: """ For each phase, checks if the task placement and frequency selection is compatible with UtilClamp requirements. This is done by comparing the current CPU capacity on which the task has been placed, with the UtilClamp value. The expected capacity is the schedutil projected frequency selection for the given uclamp value. """ metrics = {} test_failures = [] capacity_dfs = [] # ( # # schedutil factor that converts util to a frequency for a # # given CPU: # # # # next_freq = max_freq * C * util / max_cap # # # # where C = 1.25 # schedutil_factor, # # # list of frequencies available for a given CPU. # frequencies, # ) cpu_frequencies = { cpu: ((max(capacities) * (1 / self.CAPACITY_MARGIN)) / max(capacities.values()), sorted(capacities)) for cpu, capacities in self._collect_capacities( self.plat_info).items() } cpu_capacities = self._collect_capacities(self.plat_info) def schedutil_map_util_cap(cpu, util): """ Returns, for a given util on a given CPU, the capacity that schedutil would select. """ schedutil_factor, frequencies = cpu_frequencies[cpu] schedutil_freq = schedutil_factor * util # Find the first available freq that meet the schedutil freq # requirement. for freq in frequencies: if freq >= schedutil_freq: break return cpu_capacities[cpu][freq] def parse_phase(df, phase): uclamp_val = phase['uclamp_val'] num_activations = df['active'][df['active'] == 1].count() expected = schedutil_map_util_cap(df['cpu'].unique()[0], uclamp_val) # Activations numbering df['activation'] = df['active'].cumsum() # Only keep the activations df.ffill(inplace=True) df = df[df['active'] == 1] # Actual capacity at which the task is running for cpu, freq_to_capa in cpu_capacities.items(): df[cpu] = df[cpu].map(freq_to_capa) df['capacity'] = df.apply(lambda line: line[line.cpu], axis=1) failures = df[df['capacity'] != expected] num_failures = failures['activation'].nunique() test_failures.extend(failures.index.tolist()) capacity_dfs.append(df[['capacity']]) phase_str = f"Phase-{phase['phase']}" metrics[phase_str] = { 'uclamp-min': TestMetric(uclamp_val), 'expected-capacity': TestMetric(expected), 'bad-activations': TestMetric(num_failures * 100 / num_activations, "%"), } return failures.empty res = ResultBundle.from_bool(self._for_each_phase(parse_phase).all()) res.add_metric('Phases', metrics) self._plot_phases('test_frequency', test_failures, pd.concat(capacity_dfs)) return res
def test_target_alive(self) -> ResultBundle: """ Test that the hotplugs didn't leave the target in an unusable state """ return ResultBundle.from_bool(self.target_alive)