def _df_tasks_states(self, tasks=None, return_one_df=False): """ Compute tasks states for all tasks. :param tasks: If specified, states of these tasks only will be yielded. The :class:`lisa.trace.TaskID` must have a ``pid`` field specified, since the task state is per-PID. :type tasks: list(lisa.trace.TaskID) or list(int) :param return_one_df: If ``True``, a single dataframe is returned with new extra columns. If ``False``, a generator is returned that yields tuples of ``(TaskID, task_df)``. Each ``task_df`` contains the new columns. :type return_one_df: bool """ ###################################################### # A) Assemble the sched_switch and sched_wakeup events ###################################################### wk_df = self.trace.df_event('sched_wakeup') sw_df = self.trace.df_event('sched_switch') try: wkn_df = self.trace.df_event('sched_wakeup_new') except MissingTraceEventError: pass else: wk_df = pd.concat([wk_df, wkn_df]) wk_df = wk_df[["pid", "comm", "target_cpu", "__cpu"]].copy(deep=False) wk_df["curr_state"] = TaskState.TASK_WAKING prev_sw_df = sw_df[["__cpu", "prev_pid", "prev_state", "prev_comm"]].copy() next_sw_df = sw_df[["__cpu", "next_pid", "next_comm"]].copy() prev_sw_df.rename(columns={ "prev_pid": "pid", "prev_state": "curr_state", "prev_comm": "comm", }, inplace=True) next_sw_df["curr_state"] = TaskState.TASK_ACTIVE next_sw_df.rename(columns={ 'next_pid': 'pid', 'next_comm': 'comm' }, inplace=True) all_sw_df = prev_sw_df.append(next_sw_df, sort=False) # Integer values are prefered here, otherwise the whole column # is converted to float64 all_sw_df['target_cpu'] = -1 df = all_sw_df.append(wk_df, sort=False) df.sort_index(inplace=True) df.rename(columns={'__cpu': 'cpu'}, inplace=True) # Restrict the set of data we will process to a given set of tasks if tasks is not None: def resolve_task(task): """ Get a TaskID for each task, and only update existing TaskID if they lack a PID field, since that's what we care about in that function. """ try: do_update = task.pid is None except AttributeError: do_update = False return self.trace.get_task_id(task, update=do_update) tasks = list(map(resolve_task, tasks)) df = df_filter_task_ids(df, tasks) # Return a unique dataframe with new columns added if return_one_df: df.sort_index(inplace=True) df.index.name = 'Time' df.reset_index(inplace=True) # Since sched_switch is split in two df (next and prev), we end up with # duplicated indices. Avoid that by incrementing them by the minimum # amount possible. df = df_update_duplicates(df, col='Time', inplace=True) grouped = df.groupby('pid', observed=True, sort=False) new_columns = dict( next_state=grouped['curr_state'].shift( -1, fill_value=TaskState.TASK_UNKNOWN), # GroupBy.transform() will run the function on each group, and # concatenate the resulting series to create a new column. # Note: We actually need transform() to chain 2 operations on # the group, otherwise the first operation returns a final # Series, and the 2nd is not applied on groups delta=grouped['Time'].transform( lambda time: time.diff().shift(-1)), ) df = df.assign(**new_columns) df.set_index('Time', inplace=True) return df # Return a generator yielding (TaskID, task_df) tuples else: def make_pid_df(pid_df): # Even though the initial dataframe contains duplicated indices due to # using both prev_pid and next_pid in sched_switch event, we should # never end up with prev_pid == next_pid, so task-specific dataframes # are expected to be free from duplicated timestamps. # assert not df.index.duplicated().any() # Copy the df to add new columns pid_df = pid_df.copy(deep=False) # For each PID, add the time it spent in each state pid_df['delta'] = pid_df.index.to_series().diff().shift(-1) pid_df['next_state'] = pid_df['curr_state'].shift( -1, fill_value=TaskState.TASK_UNKNOWN) return pid_df signals = df_split_signals(df, ['pid']) return ((TaskID(pid=col['pid'], comm=None), make_pid_df(pid_df)) for col, pid_df in signals)
def _df_all_events(self, events, field_sep=' ', fields_as_cols=None, event_as_col=True): """ Split implementation to be able to use the cache """ if fields_as_cols is None: fields_as_cols = ['__comm', '__pid', '__cpu'] else: fields_as_cols = list(fields_as_cols) trace = self.trace if not events: df = pd.DataFrame( dict.fromkeys((['info'] + fields_as_cols + ['event'] if event_as_col else []), [])) else: if event_as_col: fmt = '{fields}' else: fmt = '{{event:<{max_len}}}: {{fields}}'.format(max_len=max( len(event) for event in events)) fields_as_cols_set = set(fields_as_cols) def make_info_row(row, event): fields = field_sep.join(f'{key}={value}' for key, value in row.iteritems() if key not in fields_as_cols_set) return fmt.format( event=event, fields=fields, ) def make_info_df(event): df = trace.df_event(event) df = pd.DataFrame( { 'info': df.apply(make_info_row, axis=1, event=event), **{field: df[field] for field in fields_as_cols} }, index=df.index, ) if event_as_col: df['event'] = event return df df = pd.concat(map(make_info_df, events)) df.sort_index(inplace=True) df_update_duplicates(df, inplace=True) # Reorder the columns to provide a better kernelshark-like display columns_order = ( [col for col in df.columns if col.startswith('__')] + (['event'] if event_as_col else []) + ['info']) df = df[order_as(df.columns, columns_order)] df.attrs['name'] = 'events' return df