コード例 #1
0
    def __init__(self,
                 obj,
                 groupby_obj=None,
                 keys=None,
                 axis=0,
                 level=None,
                 grouper=None,
                 exclusions=None,
                 selection=None,
                 as_index=True,
                 sort=True,
                 group_keys=True,
                 squeeze=False,
                 observed=False,
                 mutated=False,
                 grouper_cache=None):
        def fill_value(v, key):
            return v if v is not None or groupby_obj is None else getattr(
                groupby_obj, key)

        self.obj = obj
        self.keys = fill_value(keys, 'keys')
        self.axis = fill_value(axis, 'axis')
        self.level = fill_value(level, 'level')
        self.exclusions = fill_value(exclusions, 'exclusions')
        self.selection = selection
        self.as_index = fill_value(as_index, 'as_index')
        self.sort = fill_value(sort, 'sort')
        self.group_keys = fill_value(group_keys, 'group_keys')
        self.squeeze = fill_value(squeeze, 'squeeze')
        self.observed = fill_value(observed, 'observed')
        self.mutated = fill_value(mutated, 'mutated')

        if groupby_obj is None:
            groupby_kw = dict(keys=keys,
                              axis=axis,
                              level=level,
                              grouper=grouper,
                              exclusions=exclusions,
                              as_index=as_index,
                              group_keys=group_keys,
                              squeeze=squeeze,
                              observed=observed,
                              mutated=mutated)
            if not _HAS_SQUEEZE:  # pragma: no branch
                groupby_kw.pop('squeeze')

            if obj.ndim == 2:
                self.groupby_obj = DataFrameGroupBy(obj, **groupby_kw)
            else:
                self.groupby_obj = SeriesGroupBy(obj, **groupby_kw)
        else:
            self.groupby_obj = groupby_obj

        if grouper_cache:
            self.groupby_obj.grouper._cache = grouper_cache
        if selection:
            self.groupby_obj = self.groupby_obj[selection]

        self.is_frame = isinstance(self.groupby_obj, DataFrameGroupBy)
コード例 #2
0
 def _merge(self, gby: SeriesGroupBy) -> pd.Series:
     """
     Merges together the graph-like objects associated to the events of one progressive chunk.
     """
     index = []
     graphs = []
     for i in gby.groups.keys():
         index.append(i)
         df_group = gby.get_group(i)
         if isinstance(df_group.iloc[0], str):
             graphs.append(df_group.iloc[0])
         else:
             graphs.append(reduce(self._graph_space.merge, df_group))
     return pd.Series(graphs, index=index)
コード例 #3
0
class GroupByWrapper:
    def __init__(self,
                 obj,
                 groupby_obj=None,
                 keys=None,
                 axis=0,
                 level=None,
                 grouper=None,
                 exclusions=None,
                 selection=None,
                 as_index=True,
                 sort=True,
                 group_keys=True,
                 squeeze=False,
                 observed=False,
                 mutated=False,
                 grouper_cache=None):
        def fill_value(v, key):
            return v if v is not None or groupby_obj is None else getattr(
                groupby_obj, key)

        self.obj = obj
        self.keys = fill_value(keys, 'keys')
        self.axis = fill_value(axis, 'axis')
        self.level = fill_value(level, 'level')
        self.exclusions = fill_value(exclusions, 'exclusions')
        self.selection = selection
        self.as_index = fill_value(as_index, 'as_index')
        self.sort = fill_value(sort, 'sort')
        self.group_keys = fill_value(group_keys, 'group_keys')
        self.squeeze = fill_value(squeeze, 'squeeze')
        self.observed = fill_value(observed, 'observed')
        self.mutated = fill_value(mutated, 'mutated')

        if groupby_obj is None:
            groupby_kw = dict(keys=keys,
                              axis=axis,
                              level=level,
                              grouper=grouper,
                              exclusions=exclusions,
                              as_index=as_index,
                              group_keys=group_keys,
                              squeeze=squeeze,
                              observed=observed,
                              mutated=mutated)
            if not _HAS_SQUEEZE:  # pragma: no branch
                groupby_kw.pop('squeeze')

            if obj.ndim == 2:
                self.groupby_obj = DataFrameGroupBy(obj, **groupby_kw)
            else:
                self.groupby_obj = SeriesGroupBy(obj, **groupby_kw)
        else:
            self.groupby_obj = groupby_obj

        if grouper_cache:
            self.groupby_obj.grouper._cache = grouper_cache
        if selection:
            self.groupby_obj = self.groupby_obj[selection]

        self.is_frame = isinstance(self.groupby_obj, DataFrameGroupBy)

    def __getitem__(self, item):
        return GroupByWrapper(self.obj,
                              keys=self.keys,
                              axis=self.axis,
                              level=self.level,
                              grouper=self.groupby_obj.grouper,
                              exclusions=self.exclusions,
                              selection=item,
                              as_index=self.as_index,
                              sort=self.sort,
                              group_keys=self.group_keys,
                              squeeze=self.squeeze,
                              observed=self.observed,
                              mutated=self.mutated)

    def __getattr__(self, item):
        if item.startswith('_'):  # pragma: no cover
            return object.__getattribute__(self, item)
        if item in getattr(self.obj, 'columns', ()):
            return self.__getitem__(item)
        return getattr(self.groupby_obj, item)

    def __iter__(self):
        return self.groupby_obj.__iter__()

    def __sizeof__(self):
        return sys.getsizeof(self.obj) \
            + sys.getsizeof(getattr(self.groupby_obj.grouper, '_cache', None))

    @property
    def empty(self):
        return self.obj.empty

    @property
    def shape(self):
        shape = list(self.groupby_obj.obj.shape)
        if self.is_frame and self.selection:
            shape[1] = len(self.selection)
        return tuple(shape)

    def to_tuple(self, truncate=False, pickle_function=False):
        if self.selection and truncate:
            if isinstance(self.selection,
                          Iterable) and not isinstance(self.selection, str):
                item_list = list(self.selection)
            else:
                item_list = [self.selection]
            item_set = set(item_list)

            if isinstance(self.keys, list):
                sel_keys = self.keys
            elif self.keys in self.obj.columns:
                sel_keys = [self.keys]
            else:
                sel_keys = []

            all_items = item_list + [
                k for k in sel_keys or () if k not in item_set
            ]
            if set(all_items) == set(self.obj.columns):
                obj = self.obj
            else:
                obj = self.obj[all_items]
        else:
            obj = self.obj

        if pickle_function and callable(self.keys):
            keys = cloudpickle.dumps(self.keys)
        else:
            keys = self.keys

        return obj, keys, self.axis, self.level, self.exclusions, self.selection, \
            self.as_index, self.sort, self.group_keys, self.squeeze, self.observed, \
            self.mutated, getattr(self.groupby_obj.grouper, '_cache', dict())

    @classmethod
    def from_tuple(cls, tp):
        obj, keys, axis, level, exclusions, selection, as_index, sort, group_keys, squeeze, \
            observed, mutated, grouper_cache = tp

        if isinstance(keys, (bytes, bytearray)):
            keys = cloudpickle.loads(keys)

        return cls(obj,
                   keys=keys,
                   axis=axis,
                   level=level,
                   exclusions=exclusions,
                   selection=selection,
                   as_index=as_index,
                   sort=sort,
                   group_keys=group_keys,
                   squeeze=squeeze,
                   observed=observed,
                   mutated=mutated,
                   grouper_cache=grouper_cache)
コード例 #4
0
ファイル: groupby_wrapper.py プロジェクト: mars-project/mars
class GroupByWrapper:
    def __init__(
        self,
        obj,
        groupby_obj=None,
        keys=None,
        axis=0,
        level=None,
        grouper=None,
        exclusions=None,
        selection=None,
        as_index=True,
        sort=True,
        group_keys=True,
        squeeze=False,
        observed=False,
        mutated=False,
        grouper_cache=None,
    ):
        def fill_value(v, key):
            return (
                v if v is not None or groupby_obj is None else getattr(groupby_obj, key)
            )

        self.obj = obj
        self.keys = fill_value(keys, "keys")
        self.axis = fill_value(axis, "axis")
        self.level = fill_value(level, "level")
        self.exclusions = fill_value(exclusions, "exclusions")
        self.selection = selection
        self.as_index = fill_value(as_index, "as_index")
        self.sort = fill_value(sort, "sort")
        self.group_keys = fill_value(group_keys, "group_keys")
        self.squeeze = fill_value(squeeze, "squeeze")
        self.observed = fill_value(observed, "observed")
        self.mutated = fill_value(mutated, "mutated")

        if groupby_obj is None:
            groupby_kw = dict(
                keys=keys,
                axis=axis,
                level=level,
                grouper=grouper,
                exclusions=exclusions,
                as_index=as_index,
                group_keys=group_keys,
                squeeze=squeeze,
                observed=observed,
                mutated=mutated,
            )
            if not _HAS_SQUEEZE:  # pragma: no branch
                groupby_kw.pop("squeeze")

            if obj.ndim == 2:
                self.groupby_obj = DataFrameGroupBy(obj, **groupby_kw)
            else:
                self.groupby_obj = SeriesGroupBy(obj, **groupby_kw)
        else:
            self.groupby_obj = groupby_obj

        if grouper_cache:
            self.groupby_obj.grouper._cache = grouper_cache
        if selection:
            self.groupby_obj = self.groupby_obj[selection]

        self.is_frame = isinstance(self.groupby_obj, DataFrameGroupBy)

    def __getitem__(self, item):
        return GroupByWrapper(
            self.obj,
            keys=self.keys,
            axis=self.axis,
            level=self.level,
            grouper=self.groupby_obj.grouper,
            exclusions=self.exclusions,
            selection=item,
            as_index=self.as_index,
            sort=self.sort,
            group_keys=self.group_keys,
            squeeze=self.squeeze,
            observed=self.observed,
            mutated=self.mutated,
        )

    def __getattr__(self, item):
        if item.startswith("_"):  # pragma: no cover
            return object.__getattribute__(self, item)
        if item in getattr(self.obj, "columns", ()):
            return self.__getitem__(item)
        return getattr(self.groupby_obj, item)

    def __iter__(self):
        return self.groupby_obj.__iter__()

    def __sizeof__(self):
        return sys.getsizeof(self.obj) + sys.getsizeof(
            getattr(self.groupby_obj.grouper, "_cache", None)
        )

    def estimate_size(self):
        return estimate_pandas_size(self.obj) + estimate_pandas_size(self.obj.index)

    def __reduce__(self):
        return (
            type(self).from_tuple,
            (self.to_tuple(pickle_function=True, truncate=True),),
        )

    def __bool__(self):
        return bool(np.prod(self.shape))

    @property
    def empty(self):
        return self.obj.empty

    @property
    def shape(self):
        shape = list(self.groupby_obj.obj.shape)
        if self.is_frame and self.selection:
            shape[1] = len(self.selection)
        return tuple(shape)

    @property
    def _selected_obj(self):
        return getattr(self.groupby_obj, "_selected_obj")

    def to_tuple(self, truncate=False, pickle_function=False):
        if self.selection and truncate:
            if isinstance(self.selection, Iterable) and not isinstance(
                self.selection, str
            ):
                item_list = list(self.selection)
            else:
                item_list = [self.selection]
            item_set = set(item_list)

            if isinstance(self.keys, list):
                sel_keys = self.keys
            elif self.keys in self.obj.columns:
                sel_keys = [self.keys]
            else:
                sel_keys = []

            all_items = item_list + [k for k in sel_keys or () if k not in item_set]
            if set(all_items) == set(self.obj.columns):
                obj = self.obj
            else:
                obj = self.obj[all_items]
        else:
            obj = self.obj

        if pickle_function and callable(self.keys):
            keys = cloudpickle.dumps(self.keys)
        else:
            keys = self.keys

        return (
            obj,
            keys,
            self.axis,
            self.level,
            self.exclusions,
            self.selection,
            self.as_index,
            self.sort,
            self.group_keys,
            self.squeeze,
            self.observed,
            self.mutated,
            getattr(self.groupby_obj.grouper, "_cache", dict()),
        )

    @classmethod
    def from_tuple(cls, tp):
        (
            obj,
            keys,
            axis,
            level,
            exclusions,
            selection,
            as_index,
            sort,
            group_keys,
            squeeze,
            observed,
            mutated,
            grouper_cache,
        ) = tp

        if isinstance(keys, (bytes, bytearray)):
            keys = cloudpickle.loads(keys)

        return cls(
            obj,
            keys=keys,
            axis=axis,
            level=level,
            exclusions=exclusions,
            selection=selection,
            as_index=as_index,
            sort=sort,
            group_keys=group_keys,
            squeeze=squeeze,
            observed=observed,
            mutated=mutated,
            grouper_cache=grouper_cache,
        )