Python df_update_duplicatesの例

プログラミング言語: Python

名前空間/パッケージ名: lisa.datautils

メソッド/関数: df_update_duplicates

hotexamples.comのコード掲載数: 2

Python df_update_duplicates - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのlisa.datautils.df_update_duplicatesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: tasks.py プロジェクト: Smilence902/lisa

    def _df_tasks_states(self, tasks=None, return_one_df=False):
        """
        Compute tasks states for all tasks.

        :param tasks: If specified, states of these tasks only will be yielded.
            The :class:`lisa.trace.TaskID` must have a ``pid`` field specified,
            since the task state is per-PID.
        :type tasks: list(lisa.trace.TaskID) or list(int)

        :param return_one_df: If ``True``, a single dataframe is returned with
            new extra columns. If ``False``, a generator is returned that
            yields tuples of ``(TaskID, task_df)``. Each ``task_df`` contains
            the new columns.
        :type return_one_df: bool
        """
        ######################################################
        # A) Assemble the sched_switch and sched_wakeup events
        ######################################################

        wk_df = self.trace.df_event('sched_wakeup')
        sw_df = self.trace.df_event('sched_switch')

        try:
            wkn_df = self.trace.df_event('sched_wakeup_new')
        except MissingTraceEventError:
            pass
        else:
            wk_df = pd.concat([wk_df, wkn_df])

        wk_df = wk_df[["pid", "comm", "target_cpu", "__cpu"]].copy(deep=False)
        wk_df["curr_state"] = TaskState.TASK_WAKING

        prev_sw_df = sw_df[["__cpu", "prev_pid", "prev_state",
                            "prev_comm"]].copy()
        next_sw_df = sw_df[["__cpu", "next_pid", "next_comm"]].copy()

        prev_sw_df.rename(columns={
            "prev_pid": "pid",
            "prev_state": "curr_state",
            "prev_comm": "comm",
        },
                          inplace=True)

        next_sw_df["curr_state"] = TaskState.TASK_ACTIVE
        next_sw_df.rename(columns={
            'next_pid': 'pid',
            'next_comm': 'comm'
        },
                          inplace=True)

        all_sw_df = prev_sw_df.append(next_sw_df, sort=False)

        # Integer values are prefered here, otherwise the whole column
        # is converted to float64
        all_sw_df['target_cpu'] = -1

        df = all_sw_df.append(wk_df, sort=False)
        df.sort_index(inplace=True)
        df.rename(columns={'__cpu': 'cpu'}, inplace=True)

        # Restrict the set of data we will process to a given set of tasks
        if tasks is not None:

            def resolve_task(task):
                """
                Get a TaskID for each task, and only update existing TaskID if
                they lack a PID field, since that's what we care about in that
                function.
                """
                try:
                    do_update = task.pid is None
                except AttributeError:
                    do_update = False

                return self.trace.get_task_id(task, update=do_update)

            tasks = list(map(resolve_task, tasks))
            df = df_filter_task_ids(df, tasks)

        # Return a unique dataframe with new columns added
        if return_one_df:
            df.sort_index(inplace=True)
            df.index.name = 'Time'
            df.reset_index(inplace=True)

            # Since sched_switch is split in two df (next and prev), we end up with
            # duplicated indices. Avoid that by incrementing them by the minimum
            # amount possible.
            df = df_update_duplicates(df, col='Time', inplace=True)

            grouped = df.groupby('pid', observed=True, sort=False)
            new_columns = dict(
                next_state=grouped['curr_state'].shift(
                    -1, fill_value=TaskState.TASK_UNKNOWN),
                # GroupBy.transform() will run the function on each group, and
                # concatenate the resulting series to create a new column.
                # Note: We actually need transform() to chain 2 operations on
                # the group, otherwise the first operation returns a final
                # Series, and the 2nd is not applied on groups
                delta=grouped['Time'].transform(
                    lambda time: time.diff().shift(-1)),
            )
            df = df.assign(**new_columns)
            df.set_index('Time', inplace=True)

            return df

        # Return a generator yielding (TaskID, task_df) tuples
        else:

            def make_pid_df(pid_df):
                # Even though the initial dataframe contains duplicated indices due to
                # using both prev_pid and next_pid in sched_switch event, we should
                # never end up with prev_pid == next_pid, so task-specific dataframes
                # are expected to be free from duplicated timestamps.
                # assert not df.index.duplicated().any()

                # Copy the df to add new columns
                pid_df = pid_df.copy(deep=False)

                # For each PID, add the time it spent in each state
                pid_df['delta'] = pid_df.index.to_series().diff().shift(-1)
                pid_df['next_state'] = pid_df['curr_state'].shift(
                    -1, fill_value=TaskState.TASK_UNKNOWN)
                return pid_df

            signals = df_split_signals(df, ['pid'])
            return ((TaskID(pid=col['pid'], comm=None), make_pid_df(pid_df))
                    for col, pid_df in signals)

コード例 #2

ファイルを表示

ファイル: notebook.py プロジェクト: douglas-raillard-arm/lisa

    def _df_all_events(self,
                       events,
                       field_sep=' ',
                       fields_as_cols=None,
                       event_as_col=True):
        """
        Split implementation to be able to use the cache
        """
        if fields_as_cols is None:
            fields_as_cols = ['__comm', '__pid', '__cpu']
        else:
            fields_as_cols = list(fields_as_cols)

        trace = self.trace

        if not events:
            df = pd.DataFrame(
                dict.fromkeys((['info'] + fields_as_cols +
                               ['event'] if event_as_col else []), []))
        else:
            if event_as_col:
                fmt = '{fields}'
            else:
                fmt = '{{event:<{max_len}}}: {{fields}}'.format(max_len=max(
                    len(event) for event in events))

            fields_as_cols_set = set(fields_as_cols)

            def make_info_row(row, event):
                fields = field_sep.join(f'{key}={value}'
                                        for key, value in row.iteritems()
                                        if key not in fields_as_cols_set)
                return fmt.format(
                    event=event,
                    fields=fields,
                )

            def make_info_df(event):
                df = trace.df_event(event)
                df = pd.DataFrame(
                    {
                        'info': df.apply(make_info_row, axis=1, event=event),
                        **{field: df[field]
                           for field in fields_as_cols}
                    },
                    index=df.index,
                )

                if event_as_col:
                    df['event'] = event
                return df

            df = pd.concat(map(make_info_df, events))
            df.sort_index(inplace=True)
            df_update_duplicates(df, inplace=True)

            # Reorder the columns to provide a better kernelshark-like display
            columns_order = (
                [col for col in df.columns if col.startswith('__')] +
                (['event'] if event_as_col else []) + ['info'])
            df = df[order_as(df.columns, columns_order)]

        df.attrs['name'] = 'events'
        return df