Esempio n. 1
0
    def df_task_states(self, task, stringify=False):
        """
        DataFrame of task's state updates events

        :param task: The task's name or PID or tuple ``(pid, comm)``
        :type task: int or str or tuple(int, str)

        :param stringify: Include stringifed :class:`TaskState` columns
        :type stringify: bool

        :returns: a :class:`pandas.DataFrame` with:

          * A ``cpu`` column (the CPU where the task was on)
          * A ``target_cpu`` column (the CPU where the task has been scheduled).
            Will be ``NaN`` for non-wakeup events
          * A ``curr_state`` column (the current task state, see :class:`~TaskState`)
          * A ``next_state`` column (the next task state)
          * A ``delta`` column (the duration for which the task will remain in
            this state)
        """
        task_id = self.trace.get_task_id(task, update=False)
        df = self.df_tasks_states()

        df = df_filter_task_ids(df, [task_id])
        df = df.drop(columns=["pid", "comm"])

        if stringify:
            self.stringify_df_task_states(df, ["curr_state", "next_state"], inplace=True)

        return df
Esempio n. 2
0
 def check_task_util(task_id):
     # Only keep the data about the tasks we care about.
     _df = df_filter_task_ids(df, [task_id])
     avg = _df['util'].mean()
     util_means[task_id.comm] = avg
     # Util is not supposed to be higher than 512 given what we asked for in get_rtapp_profile()
     return avg < (512 + util_margin)
Esempio n. 3
0
    def plot_task_signals(self,
                          task,
                          axis,
                          local_fig,
                          signals=['util', 'load']):
        """
        Plot the task-related load-tracking signals

        :param task: The name or PID of the task, or a tuple ``(pid, comm)``
        :type task: str or int or tuple

        :param signals: List of signals to plot.
        :type signals: list(str)
        """
        task_id = self.trace.get_task_id(task, update=False)
        start = self.trace.start
        end = self.trace.end

        for signal in signals:
            df = self.df_tasks_signal(signal)
            df = df_filter_task_ids(df, [task_id])
            df = df_refit_index(df, start, end)
            df[signal].plot(ax=axis, drawstyle='steps-post', alpha=0.4)

        plot_overutilized = self.trace.analysis.status.plot_overutilized
        if self.trace.has_events(plot_overutilized.used_events):
            plot_overutilized(axis=axis)

        axis.set_title('Load-tracking signals of task "{}"'.format(task))
        axis.legend()
        axis.grid(True)
        axis.set_xlim(start, end)
Esempio n. 4
0
    def plot_task_required_capacity(self, task: TaskID, axis=None, **kwargs):
        """
        Plot the minimum required capacity of a task

        :param task: The name or PID of the task, or a tuple ``(pid, comm)``
        :type task: str or int or tuple
        """
        window = self.trace.window

        task_ids = self.trace.get_task_ids(task)
        df = self.df_tasks_signal('required_capacity')
        df = df_filter_task_ids(df, task_ids)
        df = df_refit_index(df, window=window)

        # Build task names (there could be multiple, during the task lifetime)
        task_name = f"Task ({', '.join(map(str, task_ids))})"

        def plotter(axis, local_fig):
            df["required_capacity"].plot(drawstyle='steps-post', ax=axis)

            axis.legend()
            axis.grid(True)

            if local_fig:
                axis.set_title(task_name)
                axis.set_ylim(0, 1100)
                axis.set_ylabel('Utilization')
                axis.set_xlabel('Time (s)')

        return self.do_plot(plotter, height=8, axis=axis, **kwargs)
Esempio n. 5
0
    def _task_filtered(self, df, task=None):
        if not task:
            return df

        task = self.trace.get_task_id(task)

        if task not in self.rtapp_tasks:
            raise ValueError(f"Task [{task}] is not an rt-app task: {self.rtapp_tasks}")

        return df_filter_task_ids(df, [task],
                                  pid_col='__pid', comm_col='__comm')
Esempio n. 6
0
    def get_simulated_pelt(self, task, signal_name):
        """
        Simulate a PELT signal for a given task.

        :param task: task to look for in the trace.
        :type task: int or str or tuple(int, str)

        :param signal_name: Name of the PELT signal to simulate.
        :type signal_name: str

        :return: A :class:`pandas.DataFrame` with a ``simulated`` column
            containing the simulated signal, along with the column of the
            signal as found in the trace.
        """
        logger = self.get_logger()
        trace = self.trace
        task = trace.get_task_id(task)
        cpus = trace.analysis.tasks.cpus_of_tasks([task])

        # Capacity lower than 1024 will create some time-scaling artifacts that
        # are not currently simulated
        assert all(
            self.plat_info["cpu-capacities"][cpu] == UTIL_SCALE
            for cpu in cpus
        )

        df_activation = trace.analysis.tasks.df_task_activation(task)
        df = trace.analysis.load_tracking.df_tasks_signal(signal_name)
        df = df_filter_task_ids(df, [task])

        # Ignore the first activation, as its signals are incorrect
        df_activation = df_activation.iloc[2:]

        # Make sure the activation df does not start before the dataframe of
        # signal values, otherwise we cannot provide a sensible init value
        df_activation = df_activation[df.index[0]:]

        # Get the initial signal value matching the first activation we will care about
        init_iloc = df.index.get_loc(df_activation.index[0], method='ffill')
        init = df[signal_name].iloc[init_iloc]

        try:
            # PELT clock in nanoseconds
            clock = df['update_time'] * 1e-9
        except KeyError:
            logger.warning('PELT clock is not available, ftrace timestamp will be used at the expense of accuracy')
            clock = None

        df['simulated'] = simulate_pelt(df_activation['active'], index=df.index, init=init, clock=clock)
        df['error'] = df[signal_name] - df['simulated']

        df = df.dropna()
        return df
Esempio n. 7
0
    def df_task_signal(self, task, signal):
        """
        Same as :meth:`df_tasks_signal` but for one task only.

        :param task: The name or PID of the task, or a tuple ``(pid, comm)``
        :type task: str or int or tuple

        :param signal: See :meth:`df_tasks_signal`.
        """
        task_id = self.trace.get_task_id(task, update=False)
        df = self.df_tasks_signal(signal=signal)
        return df_filter_task_ids(df, [task_id])
Esempio n. 8
0
    def cpus_of_tasks(self, tasks):
        """
        Return the list of CPUs where the ``tasks`` executed.

        :param tasks: Task names or PIDs or ``(pid, comm)`` to look for.
        :type tasks: list(int or str or tuple(int, str))
        """
        trace = self.trace
        df = trace.df_event('sched_switch')[['next_pid', 'next_comm', '__cpu']]

        task_ids = [trace.get_task_id(task, update=False) for task in tasks]
        df = df_filter_task_ids(df, task_ids, pid_col='next_pid', comm_col='next_comm')
        cpus = df['__cpu'].unique()

        return sorted(cpus)
Esempio n. 9
0
 def get_first_switch(row):
     comm, pid, _ = row.name
     start_time = row['Time']
     task = TaskID(comm=comm, pid=pid)
     start_swdf = df_filter_task_ids(swdf, [task],
                                     pid_col='next_pid',
                                     comm_col='next_comm')
     pre_phase_swdf = start_swdf[start_swdf.index < start_time]
     # The task with that comm and PID was never switched-in, which
     # means it was still on the current CPU when it was renamed, so we
     # just report phase-start.
     if pre_phase_swdf.empty:
         return start_time
     # Otherwise, we return the timestamp of the switch
     else:
         return pre_phase_swdf.index[-1]
Esempio n. 10
0
    def plot_task_residency(self, task: TaskID, axis, local_fig):
        """
        Plot on which CPUs the task ran on over time

        :param task: Task to track
        :type task: int or str or tuple(int, str)
        """

        task_id = self.trace.get_task_id(task, update=False)

        sw_df = self.trace.df_event("sched_switch")
        sw_df = df_filter_task_ids(sw_df, [task_id],
                                   pid_col='next_pid',
                                   comm_col='next_comm')

        if "freq-domains" in self.trace.plat_info:
            # If we are aware of frequency domains, use one color per domain
            for domain in self.trace.plat_info["freq-domains"]:
                series = sw_df[sw_df["__cpu"].isin(domain)]["__cpu"]

                if series.empty:
                    # Cycle the colours to stay consistent
                    self.cycle_colors(axis)
                else:
                    series = series_refit_index(series,
                                                window=self.trace.window)
                    series.plot(
                        ax=axis,
                        style='+',
                        label="Task running in domain {}".format(domain))
        else:
            series = series_refit_index(sw_df['__cpu'],
                                        window=self.trace.window)
            series.plot(ax=axis, style='+')

        plot_overutilized = self.trace.analysis.status.plot_overutilized
        if self.trace.has_events(plot_overutilized.used_events):
            plot_overutilized(axis=axis)

        # Add an extra CPU lane to make room for the legend
        axis.set_ylim(-0.95, self.trace.cpus_count - 0.05)

        axis.set_title("CPU residency of task \"{}\"".format(task))
        axis.set_ylabel('CPUs')
        axis.grid(True)
        axis.legend()
Esempio n. 11
0
    def plot_task_placement(self, task, axis, local_fig):
        """
        Plot the CPU placement of the task

        :param task: The name or PID of the task, or a tuple ``(pid, comm)``
        :type task: str or int or tuple
        """

        # Get all utilization update events
        df = self.df_tasks_signal('required_capacity')

        task_id = self.trace.get_task_id(task, update=False)
        df = df_filter_task_ids(df, [task_id])

        cpu_capacities = self.trace.plat_info["cpu-capacities"]

        def evaluate_placement(cpu, required_capacity):
            capacity = cpu_capacities[cpu]

            if capacity < required_capacity:
                return "CPU capacity < required capacity"
            elif capacity == required_capacity:
                return "CPU capacity == required capacity"
            else:
                return "CPU capacity > required capacity"

        df["placement"] = df.apply(lambda row: evaluate_placement(
            row["cpu"], row["required_capacity"]),
                                   axis=1)

        for stat in df["placement"].unique():
            df[df.placement == stat]["cpu"].plot(ax=axis,
                                                 style="+",
                                                 label=stat)

        plot_overutilized = self.trace.analysis.status.plot_overutilized
        if self.trace.has_events(plot_overutilized.used_events):
            plot_overutilized(axis=axis)

        axis.set_title("Utilization vs placement of task \"{}\"".format(task))

        axis.set_xlim(self.trace.start, self.trace.end)
        axis.grid(True)
        axis.legend()
Esempio n. 12
0
    def plot_task_residency(self, task: TaskID):
        """
        Plot on which CPUs the task ran on over time

        :param task: Task to track
        :type task: int or str or tuple(int, str)
        """
        task_id = self.trace.get_task_id(task, update=False)

        sw_df = self.trace.df_event("sched_switch")
        sw_df = df_filter_task_ids(sw_df, [task_id], pid_col='next_pid', comm_col='next_comm')

        def plot_residency():
            if "freq-domains" in self.trace.plat_info:
                # If we are aware of frequency domains, use one color per domain
                for domain in self.trace.plat_info["freq-domains"]:
                    series = sw_df[sw_df["__cpu"].isin(domain)]["__cpu"]
                    series = series_refit_index(series, window=self.trace.window)

                    if series.empty:
                        return _hv_neutral()
                    else:
                        return self._plot_markers(
                            series,
                            label=f"Task running in domain {domain}"
                        )
            else:
                self._plot_markers(
                    series_refit_index(sw_df['__cpu'], window=self.trace.window)
                )

        return (
            plot_residency().options(ylabel='cpu') *
            self._plot_overutilized()
        ).options(
            title=f'CPU residency of task {task}'
        )
Esempio n. 13
0
    def plot_task_required_capacity(self, task: TaskID):
        """
        Plot the minimum required capacity of a task

        :param task: The name or PID of the task, or a tuple ``(pid, comm)``
        :type task: str or int or tuple
        """
        window = self.trace.window

        task_ids = self.trace.get_task_ids(task)
        df = self.df_tasks_signal('required_capacity')
        df = df_filter_task_ids(df, task_ids)
        df = df_refit_index(df, window=window)

        # Build task names (there could be multiple, during the task lifetime)
        task_name = f"Task ({', '.join(map(str, task_ids))})"

        return plot_signal(
            df['required_capacity'],
            name='required_capacity',
        ).options(
            title=f'Required CPU capacity for task {task}',
            ylabel='Utilization',
        )
Esempio n. 14
0
    def test_activations(self) -> ResultBundle:
        """
        Test signals are properly "aggregated" at enqueue/dequeue time.

        On fast-ramp systems, `util_est_enqueud` is expected to be always
        smaller than `util_est_ewma`.

        On non fast-ramp systems, the `util_est_enqueued` is expected to be
        smaller then `util_est_ewma` in ramp-down phases, or bigger in ramp-up
        phases.

        Those conditions are checked on a single execution of a task which has
        three main behaviours:

            * STABLE: periodic big task running for a relatively long period to
              ensure `util_avg` saturation.
            * DOWN: periodic ramp-down task, to slowly decay `util_avg`
            * UP: periodic ramp-up task, to slowly increase `util_avg`

        """
        failure_reasons = {}
        metrics = {}

        # We have only two task: the main 'rt-app' task and our 'test_task'
        test_task = self.trace.analysis.rta.rtapp_tasks[-1]

        # Get list of task's activations
        df = self.trace.analysis.tasks.df_task_states(test_task)
        activations = df[(df.curr_state == TaskState.TASK_WAKING)
                         & (df.next_state == TaskState.TASK_ACTIVE)].index

        # Check task signals at each activation
        df = self.trace.df_events('sched_util_est_task')
        df = df_filter_task_ids(df, [test_task])

        # Define a time interval to correlate relative trace events.
        def restrict(df, time, delta=1e-3):
            return df[time - delta:time + delta]

        failures = []
        for idx, activation in enumerate(activations):
            avg, enq, ewma = restrict(df, activation)[[
                'util_avg', 'util_est_enqueued', 'util_est_ewma'
            ]].iloc[-1]

            metrics[idx + 1] = ActivationSignals(activation, avg, enq, ewma)

            # UtilEst is not updated when within 1% of previous activation
            if 1.01 * enq < avg:
                failure_reasons[idx] = 'enqueued({}) smaller than util_avg({}) @ {}'\
                    .format(enq, avg, activation)
                failures.append(activation)
                continue

            # Running on FastRamp kernels:
            if self.fast_ramp:

                # STABLE, DOWN and UP:
                if enq > ewma:
                    failure_reasons[idx] = 'enqueued({}) bigger than ewma({}) @ {}'\
                        .format(enq, ewma, activation)
                    failures.append(activation)
                    continue

            # Running on (legacy) non FastRamp kernels:
            else:

                phase = self.trace.analysis.rta.task_phase_at(
                    test_task, activation)

                # STABLE: ewma ramping up
                if phase.id == 0 and enq < ewma:
                    failure_reasons[idx] = 'enqueued({}) smaller than ewma({}) @ {}'\
                        .format(enq, ewma, activation)
                    failures.append(activation)
                    continue

                # DOWN: ewma ramping down
                if 0 < phase.id < 5 and enq > ewma:
                    failure_reasons[idx] = 'enqueued({}) bigger than ewma({}) @ {}'\
                        .format(enq, ewma, activation)
                    failures.append(activation)
                    continue

                # UP: ewma ramping up
                if phase.id > 4 and enq < ewma:
                    failure_reasons[idx] = 'enqueued({}) smaller than ewma({}) @ {}'\
                        .format(enq, ewma, activation)
                    failures.append(activation)
                    continue

        self._plot_signals(test_task, 'activations', failures)

        bundle = ResultBundle.from_bool(not failure_reasons)
        bundle.add_metric("signals", metrics)
        bundle.add_metric("failure reasons", failure_reasons)
        return bundle
Esempio n. 15
0
    def test_areas(self) -> ResultBundle:
        """
        Test signals are properly "dominated".

        The integral of `util_est_enqueued` is expected to be always not
        smaller than that of `util_avg`, since this last is subject to decays
        while the first not.

        The integral of `util_est_enqueued` is expected to be always greater or
        equal than the integral of `util_avg`, since this `util_avg` is subject
        to decays while `util_est_enqueued` not.

        On fast-ramp systems, the `util_est_ewma` signal is never smaller then
        the `util_est_enqueued`, thus his integral is expected to be bigger.

        On non fast-ramp systems instead, the `util_est_ewma` is expected to be
        smaller then `util_est_enqueued` in ramp-up phases, or bigger in
        ramp-down phases.

        Those conditions are checked on a single execution of a task which has
        three main behaviours:

            * STABLE: periodic big task running for a relatively long period to
              ensure `util_avg` saturation.
            * DOWN: periodic ramp-down task, to slowly decay `util_avg`
            * UP: periodic ramp-up task, to slowly increase `util_avg`

        """
        failure_reasons = {}
        metrics = {}

        # We have only two task: the main 'rt-app' task and our 'test_task'
        test_task = self.trace.analysis.rta.rtapp_tasks[-1]

        ue_df = self.trace.df_events('sched_util_est_task')
        ue_df = df_filter_task_ids(ue_df, [test_task])
        ua_df = self.trace.analysis.load_tracking.df_tasks_signal('util')
        ua_df = df_filter_task_ids(ua_df, [test_task])

        failures = []
        for phase in self.trace.analysis.rta.task_phase_windows(test_task):
            phase_df = ue_df[phase.start:phase.end]
            area_enqueued = series_integrate(phase_df.util_est_enqueued)
            area_ewma = series_integrate(phase_df.util_est_ewma)

            phase_df = ua_df[phase.start:phase.end]
            area_util = series_integrate(phase_df.util)

            metrics[phase.id] = PhaseStats(phase.start, phase.end, area_util,
                                           area_enqueued, area_ewma)

            phase_name = "phase {}".format(phase.id)
            if area_enqueued < area_util:
                failure_reasons[
                    phase_name] = 'Enqueued smaller then Util Average'
                failures.append(phase.start)
                continue

            # Running on FastRamp kernels:
            if self.fast_ramp:

                # STABLE, DOWN and UP:
                if area_ewma < area_enqueued:
                    failure_reasons[
                        phase_name] = 'NO_FAST_RAMP: EWMA smaller then Enqueued'
                    failures.append(phase.start)
                    continue

            # Running on (legacy) non FastRamp kernels:
            else:

                # STABLE: ewma ramping up
                if phase.id == 0 and area_ewma > area_enqueued:
                    failure_reasons[
                        phase_name] = 'FAST_RAMP(STABLE): EWMA bigger then Enqueued'
                    failures.append(phase.start)
                    continue

                # DOWN: ewma ramping down
                if 0 < phase.id < 5 and area_ewma < area_enqueued:
                    failure_reasons[
                        phase_name] = 'FAST_RAMP(DOWN): EWMA smaller then Enqueued'
                    failures.append(phase.start)
                    continue

                # UP: ewma ramping up
                if phase.id > 4 and area_ewma > area_enqueued:
                    failure_reasons[
                        phase_name] = 'FAST_RAMP(UP): EWMA bigger then Enqueued'
                    failures.append(phase.start)
                    continue

        bundle = ResultBundle.from_bool(failure_reasons)
        bundle.add_metric("fast ramp", self.fast_ramp)
        bundle.add_metric("phases stats", metrics)
        if not failure_reasons:
            return bundle

        # Plot signals to support debugging analysis
        self._plot_signals(test_task, 'areas', failures)
        bundle.add_metric("failure reasons", failure_reasons)

        return bundle
Esempio n. 16
0
    def test_activations(self) -> ResultBundle:
        """
        Test signals are properly "aggregated" at enqueue/dequeue time.

        On fast-ramp systems, `enqueued` is expected to be always
        smaller than `ewma`.

        On non fast-ramp systems, the `enqueued` is expected to be
        smaller then `ewma` in ramp-down phases, or bigger in ramp-up
        phases.

        Those conditions are checked on a single execution of a task which has
        three main behaviours:

            * STABLE: periodic big task running for a relatively long period to
              ensure `util` saturation.
            * DOWN: periodic ramp-down task, to slowly decay `util`
            * UP: periodic ramp-up task, to slowly increase `util`

        """
        metrics = {}
        task = self.rtapp_task_ids_map['test'][0]

        # Get list of task's activations
        df = self.trace.ana.tasks.df_task_states(task)
        activations = df[(df.curr_state == TaskState.TASK_WAKING)
                         & (df.next_state == TaskState.TASK_ACTIVE)].index

        # Check task signals at each activation
        df = self.trace.df_event('sched_util_est_se')
        df = df_filter_task_ids(df, [task])

        for idx, activation in enumerate(activations):

            # Get the value of signals at their first update after the activation
            row = df_window(df, (activation, None), method='post').iloc[0]
            # It can happen that the first updated after the activation is
            # actually in the next phase, in which case we need to check the
            # util values against the right phase
            activation = row.name

            # If we are outside a phase, ignore the activation
            try:
                phase = self.trace.ana.rta.task_phase_at(
                    task, activation, wlgen_profile=self.rtapp_profile)
            except KeyError:
                continue

            util = row['util']
            enq = row['enqueued']
            ewma = row['ewma']

            def make_issue(msg):
                return msg.format(
                    util=f'util={util}',
                    enq=f'enqueued={enq}',
                    ewma=f'ewma={ewma}',
                )

            issue = None

            # UtilEst is not updated when within 1% of previous activation
            if 1.01 * enq < util:
                issue = make_issue('{enq} smaller than {util}')

            # Running on FastRamp kernels:
            elif self.fast_ramp:

                # ewma stable, down and up
                if enq > ewma:
                    issue = make_issue('{enq} bigger than {ewma}')

            # Running on (legacy) non FastRamp kernels:
            else:
                if not phase.properties['meta']['from_test']:
                    continue

                # ewma stable
                if phase.id.startswith('test/stable'):
                    if enq < ewma:
                        issue = make_issue('stable: {enq} smaller than {ewma}')

                # ewma ramping down
                elif phase.id.startswith('test/ramp_down'):
                    if enq > ewma:
                        issue = make_issue(
                            'ramp down: {enq} bigger than {ewma}')

                # ewma ramping up
                elif phase.id.startswith('test/ramp_up'):
                    if enq < ewma:
                        issue = make_issue(
                            'ramp up: {enq} smaller than {ewma}')

            metrics[idx] = ActivationSignals(activation, util, enq, ewma,
                                             issue)

        failures = [(idx, activation_signals)
                    for idx, activation_signals in metrics.items()
                    if activation_signals.issue]

        bundle = ResultBundle.from_bool(not failures)
        bundle.add_metric("failures",
                          sorted(idx for idx, activation in failures))
        bundle.add_metric("activations", metrics)

        failures_time = [activation.time for idx, activation in failures]
        self._plot_signals(task, 'activations', failures_time)
        return bundle
Esempio n. 17
0
    def test_means(self) -> ResultBundle:
        """
        Test signals are properly "dominated".

        The mean of `enqueued` is expected to be always not
        smaller than that of `util`, since this last is subject to decays
        while the first not.

        The mean of `enqueued` is expected to be always greater or
        equal than the mean of `util`, since this `util` is subject
        to decays while `enqueued` not.

        On fast-ramp systems, the `ewma` signal is never smaller then
        the `enqueued`, thus his mean is expected to be bigger.

        On non fast-ramp systems instead, the `ewma` is expected to be
        smaller then `enqueued` in ramp-up phases, or bigger in
        ramp-down phases.

        Those conditions are checked on a single execution of a task which has
        three main behaviours:

            * STABLE: periodic big task running for a relatively long period to
              ensure `util` saturation.
            * DOWN: periodic ramp-down task, to slowly decay `util`
            * UP: periodic ramp-up task, to slowly increase `util`

        """
        failure_reasons = {}
        metrics = {}

        task = self.rtapp_task_ids_map['test'][0]

        ue_df = self.trace.df_event('sched_util_est_se')
        ue_df = df_filter_task_ids(ue_df, [task])
        ua_df = self.trace.ana.load_tracking.df_task_signal(task, 'util')

        failures = []
        for phase in self.trace.ana.rta.task_phase_windows(
                task, wlgen_profile=self.rtapp_profile):
            if not phase.properties['meta']['from_test']:
                continue

            apply_phase_window = functools.partial(df_refit_index,
                                                   window=(phase.start,
                                                           phase.end))

            ue_phase_df = apply_phase_window(ue_df)
            mean_enqueued = series_mean(ue_phase_df['enqueued'])
            mean_ewma = series_mean(ue_phase_df['ewma'])

            ua_phase_df = apply_phase_window(ua_df)
            mean_util = series_mean(ua_phase_df['util'])

            def make_issue(msg):
                return msg.format(
                    util=f'util={mean_util}',
                    enq=f'enqueued={mean_enqueued}',
                    ewma=f'ewma={mean_ewma}',
                )

            issue = None
            if mean_enqueued < mean_util:
                issue = make_issue('{enq} smaller than {util}')

            # Running on FastRamp kernels:
            elif self.fast_ramp:

                # STABLE, DOWN and UP:
                if mean_ewma < mean_enqueued:
                    issue = make_issue(
                        'no fast ramp: {ewma} smaller than {enq}')

            # Running on (legacy) non FastRamp kernels:
            else:

                # STABLE: ewma ramping up
                if phase.id.startswith('test/stable'):
                    if mean_ewma > mean_enqueued:
                        issue = make_issue(
                            'fast ramp, stable: {ewma} bigger than {enq}')

                # DOWN: ewma ramping down
                elif phase.id.startswith('test/ramp_down'):
                    if mean_ewma < mean_enqueued:
                        issue = make_issue(
                            'fast ramp, down: {ewma} smaller than {enq}')

                # UP: ewma ramping up
                elif phase.id.startswith('test/ramp_up'):
                    if mean_ewma > mean_enqueued:
                        issue = make_issue(
                            'fast ramp, up: {ewma} bigger than {enq}')

            metrics[phase.id] = PhaseStats(phase.start, phase.end, mean_util,
                                           mean_enqueued, mean_ewma, issue)

        failures = [(phase, stat) for phase, stat in metrics.items()
                    if stat.issue]

        # Plot signals to support debugging analysis
        self._plot_signals(task, 'means',
                           sorted(stat.start for phase, stat in failures))

        bundle = ResultBundle.from_bool(not failures)
        bundle.add_metric("fast ramp", self.fast_ramp)
        bundle.add_metric("phases", metrics)
        bundle.add_metric("failures",
                          sorted(phase for phase, stat in failures))
        return bundle
Esempio n. 18
0
    def get_simulated_pelt(self, task, signal_name):
        """
        Simulate a PELT signal for a given task.

        :param task: task to look for in the trace.
        :type task: int or str or tuple(int, str)

        :param signal_name: Name of the PELT signal to simulate.
        :type signal_name: str

        :return: A :class:`pandas.DataFrame` with a ``simulated`` column
            containing the simulated signal, along with the column of the
            signal as found in the trace.
        """
        logger = self.get_logger()
        trace = self.trace
        task = trace.get_task_id(task)
        cpus = trace.analysis.tasks.cpus_of_tasks([task])

        df_activation = trace.analysis.tasks.df_task_activation(
            task,
            # Util only takes into account times where the task is actually
            # executing
            preempted_value=0,
        )
        df = trace.analysis.load_tracking.df_tasks_signal(signal_name)
        df = df_filter_task_ids(df, [task])
        df = df.copy(deep=False)

        # Ignore the first activation, as its signals are incorrect
        df_activation = df_activation.iloc[2:]

        # Make sure the activation df does not start before the dataframe of
        # signal values, otherwise we cannot provide a sensible init value
        df_activation = df_activation[df.index[0]:]

        # Get the initial signal value matching the first activation we will care about
        init_iloc = df.index.get_loc(df_activation.index[0], method='ffill')
        init = df[signal_name].iloc[init_iloc]

        try:
            # PELT clock in nanoseconds
            clock = df['update_time'] * 1e-9
        except KeyError:
            if any(self.plat_info['cpu-capacities']['rtapp'][cpu] != UTIL_SCALE
                   for phase in self.wlgen_task.phases for cpu in phase.cpus):
                raise CannotCreateError(
                    'PELT time scaling can only be simulated when the PELT clock is available from the trace'
                )

            logger.warning(
                'PELT clock is not available, ftrace timestamp will be used at the expense of accuracy'
            )
            clock = None

        df['simulated'] = simulate_pelt(df_activation['active'],
                                        index=df.index,
                                        init=init,
                                        clock=clock)

        # Since load is now CPU invariant in recent kernel versions, we don't
        # rescale it back. To match the old behavior, that line is
        # needed:
        #  df['simulated'] /= self.plat_info['cpu-capacities']['rtapp'][cpu] / UTIL_SCALE
        kernel_version = self.plat_info['kernel']['version']
        if (signal_name == 'load' and kernel_version.parts[:2] < (5, 1)):
            logger().warning(
                f'Load signal is assumed to be CPU invariant, which is true for recent mainline kernels, but may be wrong for {kernel_version}'
            )

        df['error'] = df[signal_name] - df['simulated']
        df = df.dropna()
        return df
Esempio n. 19
0
    def test_noisy_tasks(self,
                         noise_threshold_pct=None,
                         noise_threshold_ms=None):
        """
        Test that no non-rtapp ("noisy") task ran for longer than the specified thresholds

        :param noise_threshold_pct: The maximum allowed runtime for noisy tasks in
          percentage of the total rt-app execution time
        :type noise_threshold_pct: float

        :param noise_threshold_ms: The maximum allowed runtime for noisy tasks in ms
        :type noise_threshold_ms: float

        If both are specified, the smallest threshold (in seconds) will be used.
        """
        if noise_threshold_pct is None and noise_threshold_ms is None:
            raise ValueError('Both "{}" and "{}" cannot be None'.format(
                "noise_threshold_pct", "noise_threshold_ms"))

        # No task can run longer than the recorded duration
        threshold_s = self.trace.time_range

        if noise_threshold_pct is not None:
            threshold_s = noise_threshold_pct * self.trace.time_range / 100

        if noise_threshold_ms is not None:
            threshold_s = min(threshold_s, noise_threshold_ms * 1e3)

        df = self.trace.analysis.tasks.df_tasks_runtime()

        # We don't want to account the test tasks
        ignored_ids = list(map(self.trace.get_task_id, self.rtapp_tasks))

        def compute_duration_pct(row):
            return row.runtime * 100 / self.trace.time_range

        df["runtime_pct"] = df.apply(compute_duration_pct, axis=1)
        df['pid'] = df.index

        # Figure out which PIDs to exclude from the thresholds
        for key, threshold in self.NOISE_ACCOUNTING_THRESHOLDS.items():
            # Find out which task(s) this threshold is about
            if isinstance(key, str):
                comms = [
                    comm for comm in df.comm.values if re.match(key, comm)
                ]
                task_ids = [self.trace.get_task_id(comm) for comm in comms]
            else:
                # Use update=False to let None fields propagate, as they are
                # used to indicate a "dont care" value
                task_ids = [self.trace.get_task_id(key, update=False)]

            # For those tasks, check the threshold
            ignored_ids.extend(
                task_id for task_id in task_ids if df_filter_task_ids(
                    df, [task_id]).iloc[0].runtime_pct <= threshold)

        self.get_logger().info(
            "Ignored PIDs for noise contribution: {}".format(", ".join(
                map(str, ignored_ids))))

        # Filter out unwanted tasks (rt-app tasks + thresholds)
        df_noise = df_filter_task_ids(df, ignored_ids, invert=True)

        if df_noise.empty:
            return ResultBundle.from_bool(True)

        pid = df_noise.index[0]
        comm = df_noise.comm.values[0]
        duration_s = df_noise.runtime.values[0]
        duration_pct = duration_s * 100 / self.trace.time_range

        res = ResultBundle.from_bool(duration_s < threshold_s)
        metric = {
            "pid": pid,
            "comm": comm,
            "duration (abs)": TestMetric(duration_s, "s"),
            "duration (rel)": TestMetric(duration_pct, "%")
        }
        res.add_metric("noisiest task", metric)

        return res
Esempio n. 20
0
    def _df_tasks_states(self, tasks=None, return_one_df=False):
        """
        Compute tasks states for all tasks.

        :param tasks: If specified, states of these tasks only will be yielded.
            The :class:`lisa.trace.TaskID` must have a ``pid`` field specified,
            since the task state is per-PID.
        :type tasks: list(lisa.trace.TaskID) or list(int)

        :param return_one_df: If ``True``, a single dataframe is returned with
            new extra columns. If ``False``, a generator is returned that
            yields tuples of ``(TaskID, task_df)``. Each ``task_df`` contains
            the new columns.
        :type return_one_df: bool
        """
        ######################################################
        # A) Assemble the sched_switch and sched_wakeup events
        ######################################################

        wk_df = self.trace.df_event('sched_wakeup')
        sw_df = self.trace.df_event('sched_switch')

        try:
            wkn_df = self.trace.df_event('sched_wakeup_new')
        except MissingTraceEventError:
            pass
        else:
            wk_df = pd.concat([wk_df, wkn_df])

        wk_df = wk_df[["pid", "comm", "target_cpu", "__cpu"]].copy(deep=False)
        wk_df["curr_state"] = TaskState.TASK_WAKING

        prev_sw_df = sw_df[["__cpu", "prev_pid", "prev_state",
                            "prev_comm"]].copy()
        next_sw_df = sw_df[["__cpu", "next_pid", "next_comm"]].copy()

        prev_sw_df.rename(columns={
            "prev_pid": "pid",
            "prev_state": "curr_state",
            "prev_comm": "comm",
        },
                          inplace=True)

        next_sw_df["curr_state"] = TaskState.TASK_ACTIVE
        next_sw_df.rename(columns={
            'next_pid': 'pid',
            'next_comm': 'comm'
        },
                          inplace=True)

        all_sw_df = prev_sw_df.append(next_sw_df, sort=False)

        # Integer values are prefered here, otherwise the whole column
        # is converted to float64
        all_sw_df['target_cpu'] = -1

        df = all_sw_df.append(wk_df, sort=False)
        df.sort_index(inplace=True)
        df.rename(columns={'__cpu': 'cpu'}, inplace=True)

        # Restrict the set of data we will process to a given set of tasks
        if tasks is not None:

            def resolve_task(task):
                """
                Get a TaskID for each task, and only update existing TaskID if
                they lack a PID field, since that's what we care about in that
                function.
                """
                try:
                    do_update = task.pid is None
                except AttributeError:
                    do_update = False

                return self.trace.get_task_id(task, update=do_update)

            tasks = list(map(resolve_task, tasks))
            df = df_filter_task_ids(df, tasks)

        # Return a unique dataframe with new columns added
        if return_one_df:
            df.sort_index(inplace=True)
            df.index.name = 'Time'
            df.reset_index(inplace=True)

            # Since sched_switch is split in two df (next and prev), we end up with
            # duplicated indices. Avoid that by incrementing them by the minimum
            # amount possible.
            df = df_update_duplicates(df, col='Time', inplace=True)

            grouped = df.groupby('pid', observed=True, sort=False)
            new_columns = dict(
                next_state=grouped['curr_state'].shift(
                    -1, fill_value=TaskState.TASK_UNKNOWN),
                # GroupBy.transform() will run the function on each group, and
                # concatenate the resulting series to create a new column.
                # Note: We actually need transform() to chain 2 operations on
                # the group, otherwise the first operation returns a final
                # Series, and the 2nd is not applied on groups
                delta=grouped['Time'].transform(
                    lambda time: time.diff().shift(-1)),
            )
            df = df.assign(**new_columns)
            df.set_index('Time', inplace=True)

            return df

        # Return a generator yielding (TaskID, task_df) tuples
        else:

            def make_pid_df(pid_df):
                # Even though the initial dataframe contains duplicated indices due to
                # using both prev_pid and next_pid in sched_switch event, we should
                # never end up with prev_pid == next_pid, so task-specific dataframes
                # are expected to be free from duplicated timestamps.
                # assert not df.index.duplicated().any()

                # Copy the df to add new columns
                pid_df = pid_df.copy(deep=False)

                # For each PID, add the time it spent in each state
                pid_df['delta'] = pid_df.index.to_series().diff().shift(-1)
                pid_df['next_state'] = pid_df['curr_state'].shift(
                    -1, fill_value=TaskState.TASK_UNKNOWN)
                return pid_df

            signals = df_split_signals(df, ['pid'])
            return ((TaskID(pid=col['pid'], comm=None), make_pid_df(pid_df))
                    for col, pid_df in signals)
Esempio n. 21
0
    def test_activations(self) -> ResultBundle:
        """
        Test signals are properly "aggregated" at enqueue/dequeue time.

        On fast-ramp systems, `enqueud` is expected to be always
        smaller than `ewma`.

        On non fast-ramp systems, the `enqueued` is expected to be
        smaller then `ewma` in ramp-down phases, or bigger in ramp-up
        phases.

        Those conditions are checked on a single execution of a task which has
        three main behaviours:

            * STABLE: periodic big task running for a relatively long period to
              ensure `util` saturation.
            * DOWN: periodic ramp-down task, to slowly decay `util`
            * UP: periodic ramp-up task, to slowly increase `util`

        """
        metrics = {}

        task = self.rtapp_task_ids_map['test'][0]

        # Get list of task's activations
        df = self.trace.analysis.tasks.df_task_states(task)
        activations = df[(df.curr_state == TaskState.TASK_WAKING)
                         & (df.next_state == TaskState.TASK_ACTIVE)].index

        # Check task signals at each activation
        df = self.trace.df_event('sched_util_est_se')
        df = df_filter_task_ids(df, [task])

        for idx, activation in enumerate(activations):
            # Get the value of signals at their first update after the activation
            row = df_window(df, (activation, None), method='post').iloc[0]
            util = row['util']
            enq = row['enqueued']
            ewma = row['ewma']

            def make_issue(msg):
                return msg.format(
                    util='util={}'.format(util),
                    enq='enqueud={}'.format(enq),
                    ewma='ewma={}'.format(ewma),
                )

            issue = None

            # UtilEst is not updated when within 1% of previous activation
            if 1.01 * enq < util:
                issue = make_issue('{enq} smaller than {util}')

            # Running on FastRamp kernels:
            elif self.fast_ramp:

                # ewma stable, down and up
                if enq > ewma:
                    issue = make_issue('{enq} bigger than {ewma}')

            # Running on (legacy) non FastRamp kernels:
            else:

                phase = self.trace.analysis.rta.task_phase_at(task, activation)
                # TODO: remove that once we have named phases to skip the buffer phase
                if phase.id == 0:
                    continue

                # ewma stable
                if phase.id == 1 and enq < ewma:
                    issue = make_issue('stable: {enq} smaller than {ewma}')

                # ewma ramping down
                elif phase.id <= 5 and enq > ewma:
                    issue = make_issue('ramp down: {enq} bigger than {ewma}')

                # ewma ramping up
                elif phase.id >= 6 and enq < ewma:
                    issue = make_issue('ramp up: {enq} smaller than {ewma}')

            metrics[idx] = ActivationSignals(activation, util, enq, ewma,
                                             issue)

        failures = [(idx, activation_signals)
                    for idx, activation_signals in metrics.items()
                    if activation_signals.issue]

        bundle = ResultBundle.from_bool(not failures)
        bundle.add_metric("failures",
                          sorted(idx for idx, activation in failures))
        bundle.add_metric("activations", metrics)

        failures_time = [activation.time for idx, activation in failures]
        self._plot_signals(task, 'activations', failures_time)
        return bundle
Esempio n. 22
0
    wload.run()

ftrace_coll.get_trace(trace_path)
trace = Trace(trace_path, target.plat_info, events=["sched_switch"])

# sched_switch __comm  __pid  __cpu  __line prev_comm  prev_pid  prev_prio  prev_state next_comm  next_pid  next_prio
df = trace.df_events('sched_switch')[['next_pid', 'next_comm', '__cpu']]


def analize_task_migration(task_id, ddf):
    start = ddf.index[0]
    stop = min(ddf.index[1] + 1.0, df.index[-1])
    start_cpu = ddf['__cpu'].values[0]
    stop_cpu = ddf['__cpu'].values[1]
    _df = df[start:stop][df[start:stop]['__cpu'] == start_cpu]
    print("Task {} migrated from CPU {} to CPU {}\n".format(
        task_id, start_cpu, stop_cpu))
    print(_df.to_string(max_cols=64) + "\n")


for task in tasks:
    task_id = trace.get_task_id(task, update=False)
    _df = df_filter_task_ids(df, [task_id],
                             pid_col='next_pid',
                             comm_col='next_comm')
    ddf = _df.drop_duplicates(subset='__cpu', keep='first', inplace=False)
    print("******************  sched_switch {} ********************\n {} \n".
          format(task, ddf.to_string(max_cols=64)))
    if len(ddf.index) > 1:
        analize_task_migration(task_id, ddf)