Python df_split_signals 예제들, lisa.datautils.df_split_signals Python 예제들

예제 #1

0

파일 보기

    def df_cpu_idle_state_residency(self, cpu):
        """
        Compute time spent by a given CPU in each idle state.

        :param cpu: CPU ID
        :type cpu: int

        :returns: a :class:`pandas.DataFrame` with:

          * Idle states as index
          * A ``time`` column (The time spent in the idle state)
        """
        idle_df = self.df_cpu_idle(cpu)

        # Ensure accurate time-based sum of state deltas
        idle_df = df_refit_index(idle_df, window=self.trace.window)

        # For each state, sum the time spent in it
        idle_df = df_add_delta(idle_df)

        residency = {
            cols['state']: state_df['delta'].sum()
            for cols, state_df in df_split_signals(idle_df, ['state'])
        }
        df = pd.DataFrame.from_dict(residency,
                                    orient='index',
                                    columns=['time'])
        df.index.name = 'idle_state'
        return df

예제 #2

0

파일 보기

파일: rta.py 프로젝트: ambroise-arm/lisa

    def df_phases(self, task):
        """
        Get phases actual start times and durations

        :param task: the rt-app task to filter for
        :type task: int or str or lisa.trace.TaskID

        :returns: A :class:`pandas.DataFrame` with index representing the
            start time of a phase and these column:

                * ``phase``: the phase number.
                * ``duration``: the measured phase duration.
        """
        def get_duration(phase, df):
            start = df.index[0]
            end = df.index[-1]
            duration = end - start
            return (start, {'phase': phase, 'duration': duration})

        loops_df = self.df_rtapp_loop(task)
        durations = sorted(
            get_duration(cols['phase'], df)
            for cols, df in df_split_signals(loops_df, ['phase']))

        index, columns = zip(*durations)
        return pd.DataFrame(columns, index=index)

예제 #3

0

파일 보기

파일: test_datautils.py 프로젝트: xuezhilei40308/lisa

    def test_df_split_signals(self):
        index = list(map(float, range(1, 6)))
        cols = ["foo", "bar"]
        ncols = len(cols)

        data = [(i, i % 2) for i in range(len(index))]

        df = pd.DataFrame(index=index, data=data, columns=cols)

        for ident, subdf in du.df_split_signals(df, ["bar"]):
            if ident["bar"] == 0:
                assert len(subdf) == 3
            else:
                assert len(subdf) == 2

예제 #4

0

파일 보기

파일: rta.py 프로젝트: qais-yousef/lisa

    def df_phases(self, task):
        """
        Get phases actual start times and durations

        :param task: the rt-app task to filter for
        :type task: int or str or lisa.trace.TaskID

        :returns: A :class:`pandas.DataFrame` with index representing the
            start time of a phase and these column:

                * ``phase``: the phase number.
                * ``duration``: the measured phase duration.
        """

        # Trace windowing can cut the trace anywhere, so we need to remove the
        # partial loops records to avoid confusion
        def filter_partial_loop(df):
            # Get rid of the end of a previous loop
            if not df.empty and df['event'].iloc[0] == 'end':
                df = df.iloc[1:]

            # Get rid of the beginning of a new loop at the end
            if not df.empty and df['event'].iloc[-1] == 'start':
                df = df.iloc[:-1]

            return df

        def get_duration(phase, df):
            start = df.index[0]
            end = df.index[-1]
            duration = end - start
            return (start, {'phase': phase, 'duration': duration})

        loops_df = self.df_rtapp_loop(task)

        phases_df_list = [
            (cols['phase'], filter_partial_loop(df))
            for cols, df in df_split_signals(loops_df, ['phase'])
        ]
        durations = sorted(
            get_duration(phase, df) for phase, df in phases_df_list
            if not df.empty)

        if durations:
            index, columns = zip(*durations)
            return pd.DataFrame(columns, index=index)
        else:
            return pd.DataFrame()

예제 #5

0

파일 보기

파일: load_tracking.py 프로젝트: douglas-raillard-arm/lisa

    def plot_task_placement(self, task: TaskID):
        """
        Plot the CPU placement of the task

        :param task: The name or PID of the task, or a tuple ``(pid, comm)``
        :type task: str or int or tuple
        """
        task_id = self.trace.get_task_id(task, update=False)

        # Get all utilization update events
        df = self.df_task_signal(task_id, 'required_capacity').copy()
        cpu_capacities = self.trace.plat_info["cpu-capacities"]['orig']

        df['capacity'] = df['cpu'].map(cpu_capacities)
        nr_cpus = df['cpu'].max() + 1

        def add_placement(df, comp, comp_str):
            placement = f"CPU capacity {comp_str} required capacity"
            condition = comp(df['capacity'], df['required_capacity'])
            df.loc[condition, 'placement'] = placement

        add_placement(df, operator.lt, '<')
        add_placement(df, operator.gt, '>')
        add_placement(df, operator.eq, '==')

        def plot_placement(cols, placement_df):
            placement = cols['placement']
            series = df["cpu"]
            series = series_refit_index(series, window=self.trace.window)
            series.name = 'cpu'
            return hv.Scatter(
                series,
                label=placement,
            ).options(
                marker='+',
                yticks=list(range(nr_cpus)),
            )

        return hv.Overlay(
            list(
                itertools.starmap(plot_placement,
                                  df_split_signals(df, ['placement']))) +
            [self._plot_overutilized()]).options(
                title=f'Utilization vs placement of task {task}',
                ylabel='CPU',
            )

예제 #6

0

파일 보기

    def plot_task_placement(self, task: TaskID, axis, local_fig):
        """
        Plot the CPU placement of the task

        :param task: The name or PID of the task, or a tuple ``(pid, comm)``
        :type task: str or int or tuple
        """
        task_id = self.trace.get_task_id(task, update=False)

        # Get all utilization update events
        df = self.df_task_signal(task_id, 'required_capacity').copy()
        cpu_capacities = self.trace.plat_info["cpu-capacities"]['orig']

        df['capacity'] = df['cpu'].map(cpu_capacities)

        def add_placement(df, comp, comp_str):
            placement = "CPU capacity {} required capacity".format(comp_str)
            condition = comp(df['capacity'], df['required_capacity'])
            df.loc[condition, 'placement'] = placement

        add_placement(df, operator.lt, '<')
        add_placement(df, operator.gt, '>')
        add_placement(df, operator.eq, '==')

        for cols, placement_df in df_split_signals(df, ['placement']):
            placement = cols['placement']
            series = df["cpu"]
            series = series_refit_index(series, window=self.trace.window)
            series.plot(ax=axis, style="+", label=placement)

        plot_overutilized = self.trace.analysis.status.plot_overutilized
        if self.trace.has_events(plot_overutilized.used_events):
            plot_overutilized(axis=axis)

        if local_fig:
            axis.set_title(
                'Utilization vs placement of task "{}"'.format(task))

            axis.grid(True)
            axis.legend()

예제 #7

0

파일 보기

파일: load_tracking.py 프로젝트: xuezhilei40308/lisa

    def get_expected_cpu_util(self):
        """
        Get the per-phase average CPU utilization expected from the duty cycle
        of the tasks found in the trace.

        :returns: A dict of the shape {cpu : {phase_id : expected_util}}

        .. note:: This is more robust than just looking at the duty cycle in
            the task profile, since rtapp might not reproduce accurately the
            duty cycle it was asked.
        """
        cpu_capacities = self.plat_info['cpu-capacities']['rtapp']
        cpu_util = {}
        cpu_freqs = self.plat_info['freqs']

        try:
            freq_df = self.trace.analysis.frequency.df_cpus_frequency()
        except MissingTraceEventError:
            cpus_rel_freq = None
        else:
            cpus_rel_freq = {
                # Frequency, normalized according to max frequency on that CPU
                cols['cpu']: df['frequency'] / max(cpu_freqs[cols['cpu']])
                for cols, df in df_split_signals(freq_df, ['cpu'])
            }

        for task in self.rtapp_task_ids:
            df = self.trace.analysis.tasks.df_task_activation(task)

            for row in self.trace.analysis.rta.df_phases(task).itertuples():
                phase = row.phase
                duration = row.duration
                start = row.Index
                end = start + duration
                # Ignore the first quarter of the util signal of each phase, since
                # it's impacted by the phase change, and util can be affected
                # (rtapp does some bookkeeping at the beginning of phases)
                # start += duration / 4

                # readjust the duration to take into account the modification of start
                duration = end - start
                window = (start, end)
                phase_df = df_window(df, window, clip_window=True)

                for cpu in self.cpus:

                    if cpus_rel_freq is None:
                        rel_freq_mean = 1
                    else:
                        phase_freq_series = df_window(cpus_rel_freq[cpu],
                                                      window=window,
                                                      clip_window=True)
                        # # We might not have frequency data at the beginning of the
                        # # trace, or if not frequency transition happened at all.
                        if phase_freq_series.empty:
                            rel_freq_mean = 1
                        else:
                            # If we lack freq data at the beginning of the
                            # window, assume the frequency was right.
                            if phase_freq_series.index[0] > start:
                                phase_freq_series = pd.concat([
                                    pd.Series([1.0], index=[start]),
                                    phase_freq_series
                                ])

                            # Extend the frequency to the right so that the mean
                            # takes into account all the data we have
                            freq_window = (phase_freq_series.index[0], end)
                            rel_freq_mean = series_mean(
                                series_refit_index(phase_freq_series,
                                                   window=freq_window))

                    cpu_phase_df = phase_df[phase_df['cpu'] == cpu].dropna()
                    if cpu_phase_df.empty:
                        duty_cycle = 0
                        cpu_residency = 0
                    else:
                        duty_cycle = series_mean(
                            df_refit_index(cpu_phase_df['duty_cycle'],
                                           window=window))
                        cpu_residency = end - max(cpu_phase_df.index[0], start)

                    phase_util = UTIL_SCALE * duty_cycle * (
                        cpu_capacities[cpu] / UTIL_SCALE)
                    # Pro-rata with the time spent on that CPU, so we get
                    # the correct average.
                    phase_util *= cpu_residency / duration

                    # We might not have run at max freq, e.g. because of
                    # thermal capping, so take that into account
                    phase_util *= rel_freq_mean

                    cpu_util.setdefault(cpu, {}).setdefault(phase, 0)
                    cpu_util[cpu][phase] += phase_util

        return cpu_util

예제 #8

0

파일 보기

파일: rta.py 프로젝트: douglas-raillard-arm/lisa

    def df_phases(self, task, wlgen_profile=None):
        """
        Get phases actual start times and durations

        :param task: the rt-app task to filter for
        :type task: int or str or lisa.trace.TaskID

        :param wlgen_profile: See :meth:`df_rtapp_loop`
        :type wlgen_profile: dict(str, lisa.wlgen.rta.RTAPhase) or None

        :returns: A :class:`pandas.DataFrame` with index representing the
            start time of a phase and these column:

                * ``phase``: the phase number or its name extracted from
                  ``wlgen_profile``.
                * ``duration``: the measured phase duration.
                * ``properties``: the properties mapping of the phase extracted
                  from ``wlgen_profile``.
        """

        # Trace windowing can cut the trace anywhere, so we need to remove the
        # partial loops records to avoid confusion
        def filter_partial_loop(df):
            # Get rid of the end of a previous loop
            if not df.empty and df['event'].iloc[0] == 'end':
                df = df.iloc[1:]

            # Get rid of the beginning of a new loop at the end
            if not df.empty and df['event'].iloc[-1] == 'start':
                df = df.iloc[:-1]

            return df

        def get_duration(phase, df):

            start = df.index[0]
            end = df.index[-1]
            duration = end - start
            info = {
                'phase': phase,
                'duration': duration,
            }
            # The properties are the same all along for a given phase, so we
            # can just pick the first one
            try:
                properties = df.iloc[0]['properties']
            except KeyError:
                properties = {}
            info['properties'] = properties

            return (start, info)

        loops_df = self.df_rtapp_loop(task, wlgen_profile=wlgen_profile)

        phases_df_list = [
            (cols['phase'], filter_partial_loop(df))
            for cols, df in df_split_signals(loops_df, ['phase'])
        ]
        durations = sorted(
            get_duration(phase, df) for phase, df in phases_df_list
            if not df.empty)

        if durations:
            index, columns = zip(*durations)
            return pd.DataFrame(columns, index=index)
        else:
            return pd.DataFrame()

예제 #9

0

파일 보기

파일: tasks.py 프로젝트: Smilence902/lisa

    def _df_tasks_states(self, tasks=None, return_one_df=False):
        """
        Compute tasks states for all tasks.

        :param tasks: If specified, states of these tasks only will be yielded.
            The :class:`lisa.trace.TaskID` must have a ``pid`` field specified,
            since the task state is per-PID.
        :type tasks: list(lisa.trace.TaskID) or list(int)

        :param return_one_df: If ``True``, a single dataframe is returned with
            new extra columns. If ``False``, a generator is returned that
            yields tuples of ``(TaskID, task_df)``. Each ``task_df`` contains
            the new columns.
        :type return_one_df: bool
        """
        ######################################################
        # A) Assemble the sched_switch and sched_wakeup events
        ######################################################

        wk_df = self.trace.df_event('sched_wakeup')
        sw_df = self.trace.df_event('sched_switch')

        try:
            wkn_df = self.trace.df_event('sched_wakeup_new')
        except MissingTraceEventError:
            pass
        else:
            wk_df = pd.concat([wk_df, wkn_df])

        wk_df = wk_df[["pid", "comm", "target_cpu", "__cpu"]].copy(deep=False)
        wk_df["curr_state"] = TaskState.TASK_WAKING

        prev_sw_df = sw_df[["__cpu", "prev_pid", "prev_state",
                            "prev_comm"]].copy()
        next_sw_df = sw_df[["__cpu", "next_pid", "next_comm"]].copy()

        prev_sw_df.rename(columns={
            "prev_pid": "pid",
            "prev_state": "curr_state",
            "prev_comm": "comm",
        },
                          inplace=True)

        next_sw_df["curr_state"] = TaskState.TASK_ACTIVE
        next_sw_df.rename(columns={
            'next_pid': 'pid',
            'next_comm': 'comm'
        },
                          inplace=True)

        all_sw_df = prev_sw_df.append(next_sw_df, sort=False)

        # Integer values are prefered here, otherwise the whole column
        # is converted to float64
        all_sw_df['target_cpu'] = -1

        df = all_sw_df.append(wk_df, sort=False)
        df.sort_index(inplace=True)
        df.rename(columns={'__cpu': 'cpu'}, inplace=True)

        # Restrict the set of data we will process to a given set of tasks
        if tasks is not None:

            def resolve_task(task):
                """
                Get a TaskID for each task, and only update existing TaskID if
                they lack a PID field, since that's what we care about in that
                function.
                """
                try:
                    do_update = task.pid is None
                except AttributeError:
                    do_update = False

                return self.trace.get_task_id(task, update=do_update)

            tasks = list(map(resolve_task, tasks))
            df = df_filter_task_ids(df, tasks)

        # Return a unique dataframe with new columns added
        if return_one_df:
            df.sort_index(inplace=True)
            df.index.name = 'Time'
            df.reset_index(inplace=True)

            # Since sched_switch is split in two df (next and prev), we end up with
            # duplicated indices. Avoid that by incrementing them by the minimum
            # amount possible.
            df = df_update_duplicates(df, col='Time', inplace=True)

            grouped = df.groupby('pid', observed=True, sort=False)
            new_columns = dict(
                next_state=grouped['curr_state'].shift(
                    -1, fill_value=TaskState.TASK_UNKNOWN),
                # GroupBy.transform() will run the function on each group, and
                # concatenate the resulting series to create a new column.
                # Note: We actually need transform() to chain 2 operations on
                # the group, otherwise the first operation returns a final
                # Series, and the 2nd is not applied on groups
                delta=grouped['Time'].transform(
                    lambda time: time.diff().shift(-1)),
            )
            df = df.assign(**new_columns)
            df.set_index('Time', inplace=True)

            return df

        # Return a generator yielding (TaskID, task_df) tuples
        else:

            def make_pid_df(pid_df):
                # Even though the initial dataframe contains duplicated indices due to
                # using both prev_pid and next_pid in sched_switch event, we should
                # never end up with prev_pid == next_pid, so task-specific dataframes
                # are expected to be free from duplicated timestamps.
                # assert not df.index.duplicated().any()

                # Copy the df to add new columns
                pid_df = pid_df.copy(deep=False)

                # For each PID, add the time it spent in each state
                pid_df['delta'] = pid_df.index.to_series().diff().shift(-1)
                pid_df['next_state'] = pid_df['curr_state'].shift(
                    -1, fill_value=TaskState.TASK_UNKNOWN)
                return pid_df

            signals = df_split_signals(df, ['pid'])
            return ((TaskID(pid=col['pid'], comm=None), make_pid_df(pid_df))
                    for col, pid_df in signals)

예제 #10

0

파일 보기

    def _df_group_apply(self, df, func, melt=False, index_cols=None):
        """
        Apply ``func`` on subsets of the dataframe and return the concatenated
        result.

        :param df: Dataframe in database format (meaningless index, tag and
            value columns).
        :type df: pandas.DataFrame

        :param func: Callable called with 3 parameters:

            * ``ref``: Reference subgroup dataframe for comparison purposes.
            * ``df``: Dataframe of the subgroup, to compare to ``ref``.
            * ``group``: Dictionary ``dict(column_name, value)`` identifying
              the ``df`` subgroup.
        :type func: collections.abc.Callable

        :param melt: If ``True``, extra columns added by the callback in the
            return :class:`pandas.DataFrame` will be melted, i.e. they will be
            turned into row with the column name being copied to the stat column.
        :type melt: bool

        :param index_cols: Columns to aggregate on that will be used for
            indexing the sub-dataframes, instead of the default ``agg_cols``.
        :type index_cols: list(str) or None
        """
        ref_group = FrozenDict(self._ref_group)
        # All the columns that are not involved in the group itself except the
        # value will be used as index, so that the reference group and other
        # groups can be joined meaningfully on the index for comparison
        # purposes.
        index_cols = index_cols if index_cols is not None else self._agg_cols
        index_cols = self._restrict_cols(index_cols, df)
        sub_group_cols = self._restrict_cols(self._sub_group_cols, df)

        def process_subgroup(df, group, subgroup):
            subgroup = FrozenDict(subgroup)

            try:
                ref = subref[subgroup]
            except KeyError:
                return None

            group = {**group, **subgroup}

            # Make sure that the columns/index levels relative to the group are
            # removed, since they are useless because they have a constant value
            def remove_cols(df):
                to_remove = group.keys()
                df = df.drop(columns=self._restrict_cols(to_remove, df))
                try:
                    drop_level = df.index.droplevel
                except AttributeError:
                    pass
                else:
                    df.index = drop_level(
                        sorted(set(df.index.names) & set(to_remove)))
                return df

            df = remove_cols(df)
            ref = remove_cols(ref)

            df = func(ref, df, group)
            # Only assign-back subgroup columns if they have not been set by the
            # callback directly.
            to_assign = group.keys() - set(df.columns)
            df = df.assign(
                **{col: val
                   for col, val in group.items() if col in to_assign})

            # Drop RangeIndex to avoid getting an "index" column that is
            # useless
            drop_index = isinstance(df.index, pd.RangeIndex)
            df.reset_index(drop=drop_index, inplace=True)
            return df

        # Groups as asked by the user
        comparison_groups = {
            FrozenDict(group): df.set_index(index_cols)
            for group, df in df_split_signals(df, ref_group.keys())
        }

        # We elect a comparison reference and split it in subgroups
        ref = comparison_groups[ref_group]
        subref = {
            FrozenDict(subgroup): subdf
            for subgroup, subdf in df_split_signals(ref, sub_group_cols)
        }

        # For each group, split it further in subgroups
        dfs = [
            process_subgroup(subdf, group, subgroup)
            for group, df in comparison_groups.items()
            for subgroup, subdf in df_split_signals(df, sub_group_cols)
        ]
        df = pd.concat((df for df in dfs if df is not None),
                       ignore_index=True,
                       copy=False)

        if melt:
            df = self._melt(df)

        return df