Python df_deduplicateの例、lisa.datautils.df_deduplicate Pythonの例

コード例 #1

0

ファイルを表示

ファイル: eas_behaviour.py プロジェクト: douglas-raillard-arm/lisa

    def _get_task_cpu_df(self):
        """
        Get a DataFrame mapping task names to the CPU they ran on

        Use the sched_switch trace event to find which CPU each task ran
        on. Does not reflect idleness - tasks not running are shown as running
        on the last CPU they woke on.

        :returns: A Pandas DataFrame with a column for each task, showing the
                  CPU that the task was "on" at each moment in time
        """
        def task_cpu(task):
            return task.comm, self.trace.ana.tasks.df_task_activation(
                task=task)['cpu']

        df = pd.DataFrame(
            dict(
                task_cpu(task_ids[0])
                for task, task_ids in self.rtapp_task_ids_map.items()))
        df.fillna(method='ffill', inplace=True)
        df.dropna(inplace=True)
        df = df_deduplicate(df, consecutives=True, keep='first')

        # Ensure the index is refitted so that integrals work as expected
        df = df_refit_index(df, window=self.trace.window)
        return df

コード例 #2

0

ファイルを表示

ファイル: tasks.py プロジェクト: ambroise-arm/lisa

    def df_task_activation(self, task, cpu=None, active_value=1, sleep_value=0):
        """
        DataFrame of a task's active time on a given CPU

        :param task: the task to report activations of
        :type task: int or str or tuple(int, str)

        :param cpu: the CPUs to look at. If ``None``, all CPUs will be used.
        :type cpu: int or None

        :param active_value: the value to use in the series when task is
            active.
        :type active_value: float

        :param sleep_value: the value to use in the series when task is
            sleeping.
        :type sleep_value: float

        :returns: a :class:`pandas.DataFrame` with:

          * A timestamp as index
          * A ``active`` column, containing ``active_value`` when the task is
            not sleeping, ``sleep_value`` otherwise.
          * A ``cpu`` column with the CPU the task was running on.
          * A ``duration`` column containing the duration of the current sleep or activation.
          * A ``duty_cycle`` column containing the duty cycle in ``[0...1]`` of
            the task, updated at each pair of activation and sleep.
        """

        df = self.df_task_states(task)

        def f(state):
            if state == TaskState.TASK_ACTIVE:
                return active_value
            else:
                return sleep_value

        if cpu is not None:
            df = df[df['cpu'] == cpu]

        df['active'] = df['curr_state'].map(f)
        df = df[['active', 'cpu']]

        # Only keep first occurence of each adjacent duplicates, since we get
        # events when the signal changes
        df = df_deduplicate(df, consecutives=True, keep='first')

        # Once we removed the duplicates, we can compute the time spent while sleeping or activating
        df['duration'] = df.index.to_series().diff().shift(-1)

        sleep = df[df['active'] == sleep_value]['duration']
        active = df[df['active'] == active_value]['duration']
        # Pair an activation time with it's following sleep time
        active = active.reindex_like(sleep, method='ffill')

        df['duty_cycle'] = active / (active + sleep)
        df['duty_cycle'].fillna(inplace=True, method='ffill')
        df['duty_cycle'] = df['duty_cycle'].shift(-1)

        return df

コード例 #3

0

ファイルを表示

    def _get_task_cpu_df(self, task_id):
        """
        Get a DataFrame for task migrations

        Use the sched_switch trace event to find task migration from one CPU to another.

        :returns: A Pandas DataFrame for the task, showing the
                  CPU's that the task was migrated to
        """
        df = self.trace.analysis.tasks.df_task_states(task_id)
        cpu_df = df_deduplicate(df, cols=['cpu'], keep='first', consecutives=True)

        return cpu_df

コード例 #4

0

ファイルを表示

    def df_overutilized(self):
        """
        Get overutilized events

        :returns: A :class:`pandas.DataFrame` with:

          * A ``overutilized`` column (the overutilized status at a given time)
          * A ``len`` column (the time spent in that overutilized status)
        """
        # Build sequence of overutilization "bands"
        df = self.trace.df_event('sched_overutilized')
        df = df_add_delta(df, col='len', window=self.trace.window)
        # Ignore the last line added by df_refit_index() with a NaN len
        df = df.iloc[:-1]
        # Remove duplicated index events
        df = df_deduplicate(df, keep='last', consecutives=True)
        return df[['len', 'overutilized']]

コード例 #5

0

ファイルを表示

    def df_overutilized(self):
        """
        Get overutilized events

        :returns: A :class:`pandas.DataFrame` with:

          * A ``overutilized`` column (the overutilized status at a given time)
          * A ``len`` column (the time spent in that overutilized status)
        """
        # Build sequence of overutilization "bands"
        df = self.trace.df_event('sched_overutilized')
        # There might be a race between multiple CPUs to emit the
        # sched_overutilized event, so get rid of duplicated events
        df = df_deduplicate(df,
                            cols=['overutilized'],
                            keep='first',
                            consecutives=True)
        df = df_add_delta(df, col='len', window=self.trace.window)
        # Ignore the last line added by df_refit_index() with a NaN len
        df = df.iloc[:-1]
        return df[['len', 'overutilized']]

コード例 #6

0

ファイルを表示

ファイル: eas_behaviour.py プロジェクト: ambroise-arm/lisa

    def _get_task_cpu_df(self):
        """
        Get a DataFrame mapping task names to the CPU they ran on

        Use the sched_switch trace event to find which CPU each task ran
        on. Does not reflect idleness - tasks not running are shown as running
        on the last CPU they woke on.

        :returns: A Pandas DataFrame with a column for each task, showing the
                  CPU that the task was "on" at each moment in time
        """
        tasks = self.rtapp_tasks

        df = self.trace.df_events('sched_switch')[['next_comm', '__cpu']]
        df = df[df['next_comm'].isin(tasks)]
        df = df.pivot(index=df.index,
                      columns='next_comm').fillna(method='ffill')
        cpu_df = df['__cpu']
        cpu_df = df_deduplicate(cpu_df, keep='first', consecutives=True)
        cpu_df = cpu_df[(cpu_df.shift(+1) != cpu_df).any(axis=1)]
        return cpu_df

コード例 #7

0

ファイルを表示

ファイル: tasks.py プロジェクト: Smilence902/lisa

    def df_task_activation(self,
                           task,
                           cpu=None,
                           active_value=1,
                           sleep_value=0,
                           preempted_value=np.NaN):
        """
        DataFrame of a task's active time on a given CPU

        :param task: the task to report activations of
        :type task: int or str or tuple(int, str)

        :param cpu: the CPUs to look at. If ``None``, all CPUs will be used.
        :type cpu: int or None

        :param active_value: the value to use in the series when task is
            active.
        :type active_value: float

        :param sleep_value: the value to use in the series when task is
            sleeping.
        :type sleep_value: float

        :param preempted_value: the value to use in the series when task is
            preempted (runnable but not actually executing).
        :type sleep_value: float

        :returns: a :class:`pandas.DataFrame` with:

          * A timestamp as index
          * A ``active`` column, containing ``active_value`` when the task is
            running, ``sleep_value`` when sleeping, and ``preempted_value``
            otherwise.
          * A ``cpu`` column with the CPU the task was running on.
          * A ``duration`` column containing the duration of the current sleep or activation.
          * A ``duty_cycle`` column containing the duty cycle in ``[0...1]`` of
            the task, updated at each pair of activation and sleep.
        """

        df = self.df_task_states(task)

        def f(state):
            if state == TaskState.TASK_ACTIVE:
                return active_value
            # TASK_RUNNING happens when a task is preempted (so it's not
            # TASK_ACTIVE anymore but still runnable)
            elif state == TaskState.TASK_RUNNING:
                # Return NaN regardless of preempted_value, since some below
                # code relies on that
                return np.NaN
            else:
                return sleep_value

        if cpu is not None:
            df = df[df['cpu'] == cpu]

        df = df.copy()

        # TASK_WAKING can just be removed. The delta will then be computed
        # without it, which means the time spent in WAKING state will be
        # accounted into the previous state.
        df = df[df['curr_state'] != TaskState.TASK_WAKING]

        df['active'] = df['curr_state'].map(f)
        df = df[['active', 'cpu']]

        # Only keep first occurence of each adjacent duplicates, since we get
        # events when the signal changes
        df = df_deduplicate(df, consecutives=True, keep='first')

        # Once we removed the duplicates, we can compute the time spent while sleeping or activating
        df_add_delta(df, col='duration', inplace=True)

        # Make a dataframe where the rows corresponding to preempted time are removed
        preempt_free_df = df.dropna().copy()

        # Merge consecutive activations' duration. They could have been
        # split in two by a bit of preemption, and we don't want that to
        # affect the duty cycle.
        df_combine_duplicates(preempt_free_df,
                              cols=['active'],
                              func=lambda df: df['duration'].sum(),
                              output_col='duration',
                              inplace=True)

        sleep = preempt_free_df[preempt_free_df['active'] ==
                                sleep_value]['duration']
        active = preempt_free_df[preempt_free_df['active'] ==
                                 active_value]['duration']
        # Pair an activation time with it's following sleep time
        sleep = sleep.reindex(active.index, method='bfill')
        duty_cycle = active / (active + sleep)

        df['duty_cycle'] = duty_cycle
        df['duty_cycle'].fillna(inplace=True, method='ffill')

        if not np.isnan(preempted_value):
            df['active'].fillna(preempted_value, inplace=True)

        return df