Python df_filter Examples, lisa.datautils.df_filter Python Examples

Example #1

0

Show file

    def get_df(self, remove_ref=None, compare=None):
        """
        Returns a :class:`pandas.DataFrame` containing the statistics.

        :param compare: See :class:`Stats` ``compare`` parameter. If ``None``,
            it will default to the value provided to :class:`Stats`.
        :type compare: bool or None

        :param remove_ref: If ``True``, the rows of the reference group
            described by ``ref_group`` for this object will be removed from the
            returned dataframe. If ``None``, it will default to ``compare``.
        :type remove_ref: bool or None
        """
        compare = compare if compare is not None else self._compare
        remove_ref = remove_ref if remove_ref is not None else compare

        df = self._df_stats()
        df = self._df_stats_test(df)

        if compare:
            df = self._df_compare_pct(df)

        if remove_ref:
            df = df_filter(df, self._ref_group, exclude=True)

        df = self._df_format(df)
        return df

Example #2

0

Show file

File: notebook.py Project: lukaszluba-arm/lisa

    def plot_event_field(self,
                         event: str,
                         field: str,
                         axis,
                         local_fig,
                         filter_columns=None,
                         filter_f=None):
        """
        Plot a signal represented by the filtered values of a field of an event.

        :param event: FTrace event name of interest.
        :type event: str

        :param field: Name of the field of ``event``.
        :type field: str

        :param filter_columns: Pre-filter the dataframe using
            :func:`lisa.datautils.df_filter`. Also, a signal will be inferred
            from the column names being used and will be passed to
            :meth:`lisa.trace.Trace.df_event`.
        :type filter_columns: dict or None

        :param filter_f: Function used to filter the dataframe of the event.
            The function must take a dataframe as only parameter and return
            a filtered dataframe. It is applied after ``filter_columns`` filter.
        :type filter_f: collections.abc.Callable
        """
        trace = self.trace
        if filter_columns:
            signals = [SignalDesc(event, sorted(filter_columns.keys()))]
        else:
            signals = None

        df = trace.df_event(event, signals=signals)

        if filter_columns:
            df = df_filter(df, filter_columns)

        if filter_f:
            df = filter_f(df)

        df = df_refit_index(df, window=trace.window)
        df[[field]].plot(ax=axis, drawstyle='steps-post')

Example #3

0

Show file

File: notebook.py Project: Binse-Park/lisa-1

    def plot_event_field(self,
                         event,
                         field,
                         axis,
                         local_fig,
                         filter_columns=None,
                         filter_f=None):
        """
        Plot a signal represented by the filtered values of a field of an event.

        :param event: FTrace event name of interest.
        :type event: str

        :param field: Name of the field of ``event``.
        :type field: str

        :param filter_columns: Pre-filter the dataframe using
            :func:`lisa.datautils.df_filter`
        :type filter_columns: dict or None

        :param filter_f: Function used to filter the dataframe of the event.
            The function must take a dataframe as only parameter and return
            a filtered dataframe. It is applied after ``field_values`` filter.
        :type filter_f: collections.abc.Callable
        """
        trace = self.trace
        df = trace.df_events(event)

        if filter_columns:
            df = df_filter(df, filter_columns)

        if filter_f:
            df = filter_f(df)

        df = df_refit_index(df, trace.start, trace.end)
        df[[field]].plot(ax=axis, drawstyle='steps-post')

Example #4

0

Show file

    def __init__(
        self,
        df,
        value_col='value',
        ref_group=None,
        filter_rows=None,
        compare=True,
        agg_cols=None,
        mean_ci_confidence=None,
        stats=None,
        stat_col='stat',
        unit_col='unit',
        ci_cols=('ci_minus', 'ci_plus'),
        control_var_col='fixed',
        mean_kind_col='mean_kind',
        non_normalizable_units={
            unit.name
            for unit in _STATS_UNIT.values() if not unit.normalizable
        },
    ):
        if filter_rows:
            df = df_filter(df, filter_rows)

        ref_group = ref_group or {}
        group_cols = list(ref_group.keys())
        ref_group = {k: v for k, v in ref_group.items() if v is not None}

        # Columns controlling the behavior of this class, but that are not tags
        # nor values
        tweak_cols = {mean_kind_col, control_var_col}

        tag_cols = sorted((set(df.columns) - {value_col, *ci_cols} -
                           tweak_cols) | {unit_col})

        # TODO: see if the grouping machinery can be changed to accomodate redundant tags
        # Having duplicate tags will break various grouping mechanisms, so we
        # need to get rid of them
        for col1, col2 in combinations(tag_cols.copy(), 2):
            try:
                if (df[col1] == df[col2]).all():
                    if col1 not in ref_group:
                        to_remove = col1
                    elif col2 not in ref_group:
                        to_remove = col2
                    elif ref_group[col1] == ref_group[col2]:
                        to_remove = col2
                        ref_group.pop(to_remove)
                    else:
                        raise ValueError(
                            f'ref_group has different values for "{col1}" and "{col2}" but the columns are equal'
                        )

                    df = df.drop(columns=[to_remove])
                else:
                    to_remove = None
            except KeyError:
                pass
            else:
                if to_remove is not None:
                    try:
                        tag_cols.remove(to_remove)
                    except ValueError:
                        pass

        if agg_cols:
            pass
        # Default to "iteration" if there was no ref group nor columns to
        # aggregate over
        elif 'iteration' in df.columns:
            agg_cols = ['iteration']
        # Aggregate over all tags that are not part of the ref group, since the
        # ref group keys are the tags that will remain after aggregation
        elif group_cols:
            agg_cols = sorted(set(tag_cols) - set(group_cols))
        else:
            raise ValueError(
                'No aggregation column can be inferred. Either pass a ref_group or agg_cols'
            )

        agg_cols = sorted(set(agg_cols) - {value_col, unit_col})
        if not agg_cols:
            raise ValueError(
                'No aggregation columns have been selected, ensure that each special column has only one use'
            )

        # Ultimately, the tags we want to have in the stat dataframe will not
        # include the one we aggregated over
        stat_tag_cols = [tag for tag in tag_cols if tag not in agg_cols]

        # Sub groups that allows treating tag columns that are not part of
        # the group not as an aggregation column
        sub_group_cols = set(stat_tag_cols) - set(group_cols)
        plot_group_cols = sub_group_cols - {unit_col}

        self._orig_df = df
        self._stats = stats or {
            'median': None,
            'count': None,
            # This one is custom and not from pandas
            'mean': None,
            'std': None,
        }
        self._ref_group = ref_group
        self._group_cols = group_cols
        self._compare = compare and bool(ref_group)
        self._val_col = value_col
        self._tag_cols = tag_cols
        self._stat_tag_cols = stat_tag_cols
        self._sub_group_cols = sub_group_cols
        self._plot_group_cols = plot_group_cols
        self._agg_cols = agg_cols
        self._stat_col = stat_col
        self._mean_kind_col = mean_kind_col
        self._mean_ci_confidence = 0.95 if mean_ci_confidence is None else mean_ci_confidence
        self._unit_col = unit_col
        self._control_var_col = control_var_col
        self._tweak_cols = tweak_cols
        self._ci_cols = ci_cols
        self._non_normalizable_units = non_normalizable_units