예제 #1
0
    def __init__(
        self,
        index: Index,
        grouper=None,
        obj: FrameOrSeries | None = None,
        name: Hashable = None,
        level=None,
        sort: bool = True,
        observed: bool = False,
        in_axis: bool = False,
        dropna: bool = True,
    ):
        self.name = name
        self.level = level
        self.grouper = _convert_grouper(index, grouper)
        self.all_grouper = None
        self.index = index
        self.sort = sort
        self.obj = obj
        self.observed = observed
        self.in_axis = in_axis
        self.dropna = dropna

        # right place for this?
        if isinstance(grouper, (Series, Index)) and name is None:
            self.name = grouper.name

        if isinstance(grouper, MultiIndex):
            self.grouper = grouper._values

        # we have a single grouper which may be a myriad of things,
        # some of which are dependent on the passing in level

        if level is not None:
            if not isinstance(level, int):
                if level not in index.names:
                    raise AssertionError(f"Level {level} not in index")
                level = index.names.index(level)

            if self.name is None:
                self.name = index.names[level]

            (
                self.grouper,
                self._codes,
                self._group_index,
            ) = index._get_grouper_for_level(self.grouper, level)

        # a passed Grouper like, directly get the grouper in the same way
        # as single grouper groupby, use the group_info to get codes
        elif isinstance(self.grouper, Grouper):
            # get the new grouper; we already have disambiguated
            # what key/level refer to exactly, don't need to
            # check again as we have by this point converted these
            # to an actual value (rather than a pd.Grouper)
            _, grouper, _ = self.grouper._get_grouper(
                # error: Value of type variable "FrameOrSeries" of "_get_grouper"
                # of "Grouper" cannot be "Optional[FrameOrSeries]"
                self.obj,  # type: ignore[type-var]
                validate=False,
            )
            if self.name is None:
                self.name = grouper.result_index.name
            self.obj = self.grouper.obj
            self.grouper = grouper._get_grouper()

        else:
            if self.grouper is None and self.name is not None and self.obj is not None:
                self.grouper = self.obj[self.name]

            elif isinstance(self.grouper, (list, tuple)):
                self.grouper = com.asarray_tuplesafe(self.grouper)

            # a passed Categorical
            elif is_categorical_dtype(self.grouper):

                self.grouper, self.all_grouper = recode_for_groupby(
                    self.grouper, self.sort, observed)
                categories = self.grouper.categories

                # we make a CategoricalIndex out of the cat grouper
                # preserving the categories / ordered attributes
                self._codes = self.grouper.codes
                if observed:
                    codes = algorithms.unique1d(self.grouper.codes)
                    codes = codes[codes != -1]
                    if sort or self.grouper.ordered:
                        codes = np.sort(codes)
                else:
                    codes = np.arange(len(categories))

                self._group_index = CategoricalIndex(
                    Categorical.from_codes(codes=codes,
                                           categories=categories,
                                           ordered=self.grouper.ordered),
                    name=self.name,
                )

            # we are done
            if isinstance(self.grouper, Grouping):
                self.grouper = self.grouper.grouper

            # no level passed
            elif not isinstance(self.grouper,
                                (Series, Index, ExtensionArray, np.ndarray)):
                if getattr(self.grouper, "ndim", 1) != 1:
                    t = self.name or str(type(self.grouper))
                    raise ValueError(f"Grouper for '{t}' not 1-dimensional")
                self.grouper = self.index.map(self.grouper)
                if not (hasattr(self.grouper, "__len__")
                        and len(self.grouper) == len(self.index)):
                    grper = pprint_thing(self.grouper)
                    errmsg = ("Grouper result violates len(labels) == "
                              f"len(data)\nresult: {grper}")
                    self.grouper = None  # Try for sanity
                    raise AssertionError(errmsg)

        # if we have a date/time-like grouper, make sure that we have
        # Timestamps like
        if getattr(self.grouper, "dtype", None) is not None:
            if is_datetime64_dtype(self.grouper):
                self.grouper = self.grouper.astype("datetime64[ns]")
            elif is_timedelta64_dtype(self.grouper):

                self.grouper = self.grouper.astype("timedelta64[ns]")
예제 #2
0
    def __init__(
        self,
        index: Index,
        grouper=None,
        obj: FrameOrSeries | None = None,
        level=None,
        sort: bool = True,
        observed: bool = False,
        in_axis: bool = False,
        dropna: bool = True,
    ):
        self.level = level
        self._orig_grouper = grouper
        self.grouper = _convert_grouper(index, grouper)
        self.all_grouper = None
        self.index = index
        self.sort = sort
        self.obj = obj
        self.observed = observed
        self.in_axis = in_axis
        self.dropna = dropna

        self._passed_categorical = False

        # we have a single grouper which may be a myriad of things,
        # some of which are dependent on the passing in level

        ilevel = self._ilevel
        if ilevel is not None:
            (
                self.grouper,  # Index
                self._codes,
                self._group_index,
            ) = index._get_grouper_for_level(self.grouper, ilevel)

        # a passed Grouper like, directly get the grouper in the same way
        # as single grouper groupby, use the group_info to get codes
        elif isinstance(self.grouper, Grouper):
            # get the new grouper; we already have disambiguated
            # what key/level refer to exactly, don't need to
            # check again as we have by this point converted these
            # to an actual value (rather than a pd.Grouper)
            _, newgrouper, newobj = self.grouper._get_grouper(
                # error: Value of type variable "FrameOrSeries" of "_get_grouper"
                # of "Grouper" cannot be "Optional[FrameOrSeries]"
                self.obj,  # type: ignore[type-var]
                validate=False,
            )
            self.obj = newobj

            ng = newgrouper._get_grouper()
            if isinstance(newgrouper, ops.BinGrouper):
                # in this case we have `ng is newgrouper`
                self.grouper = ng
            else:
                # ops.BaseGrouper
                # use Index instead of ndarray so we can recover the name
                self.grouper = Index(ng, name=newgrouper.result_index.name)

        elif is_categorical_dtype(self.grouper):
            # a passed Categorical
            self._passed_categorical = True

            self.grouper, self.all_grouper = recode_for_groupby(
                self.grouper, self.sort, observed
            )

        elif not isinstance(self.grouper, (Series, Index, ExtensionArray, np.ndarray)):
            # no level passed
            if getattr(self.grouper, "ndim", 1) != 1:
                t = self.name or str(type(self.grouper))
                raise ValueError(f"Grouper for '{t}' not 1-dimensional")

            self.grouper = self.index.map(self.grouper)

            if not (
                hasattr(self.grouper, "__len__")
                and len(self.grouper) == len(self.index)
            ):
                grper = pprint_thing(self.grouper)
                errmsg = (
                    "Grouper result violates len(labels) == "
                    f"len(data)\nresult: {grper}"
                )
                self.grouper = None  # Try for sanity
                raise AssertionError(errmsg)

        if isinstance(self.grouper, np.ndarray):
            # if we have a date/time-like grouper, make sure that we have
            # Timestamps like
            self.grouper = sanitize_to_nanoseconds(self.grouper)