Ejemplo n.º 1
0
    def __init__(
        self,
        index: Index,
        grouper=None,
        obj: Optional[FrameOrSeries] = None,
        name=None,
        level=None,
        sort: bool = True,
        observed: bool = False,
        in_axis: bool = False,
    ):
        self.name = name
        self.level = level
        self.grouper = _convert_grouper(index, grouper)
        self.all_grouper = None
        self.index = index
        self.sort = sort
        self.obj = obj
        self.observed = observed
        self.in_axis = in_axis

        # right place for this?
        if isinstance(grouper, (Series, Index)) and name is None:
            self.name = grouper.name

        if isinstance(grouper, MultiIndex):
            self.grouper = grouper.values

        # we have a single grouper which may be a myriad of things,
        # some of which are dependent on the passing in level

        if level is not None:
            if not isinstance(level, int):
                if level not in index.names:
                    raise AssertionError(
                        "Level {level} not in index".format(level=level))
                level = index.names.index(level)

            if self.name is None:
                self.name = index.names[level]

            (
                self.grouper,
                self._codes,
                self._group_index,
            ) = index._get_grouper_for_level(self.grouper, level)

        # a passed Grouper like, directly get the grouper in the same way
        # as single grouper groupby, use the group_info to get codes
        elif isinstance(self.grouper, Grouper):
            # get the new grouper; we already have disambiguated
            # what key/level refer to exactly, don't need to
            # check again as we have by this point converted these
            # to an actual value (rather than a pd.Grouper)
            _, grouper, _ = self.grouper._get_grouper(self.obj, validate=False)
            if self.name is None:
                self.name = grouper.result_index.name
            self.obj = self.grouper.obj
            self.grouper = grouper._get_grouper()

        else:
            if self.grouper is None and self.name is not None and self.obj is not None:
                self.grouper = self.obj[self.name]

            elif isinstance(self.grouper, (list, tuple)):
                self.grouper = com.asarray_tuplesafe(self.grouper)

            # a passed Categorical
            elif is_categorical_dtype(self.grouper):

                self.grouper, self.all_grouper = recode_for_groupby(
                    self.grouper, self.sort, observed)
                categories = self.grouper.categories

                # we make a CategoricalIndex out of the cat grouper
                # preserving the categories / ordered attributes
                self._codes = self.grouper.codes
                if observed:
                    codes = algorithms.unique1d(self.grouper.codes)
                    codes = codes[codes != -1]
                    if sort or self.grouper.ordered:
                        codes = np.sort(codes)
                else:
                    codes = np.arange(len(categories))

                self._group_index = CategoricalIndex(
                    Categorical.from_codes(codes=codes,
                                           categories=categories,
                                           ordered=self.grouper.ordered),
                    name=self.name,
                )

            # we are done
            if isinstance(self.grouper, Grouping):
                self.grouper = self.grouper.grouper

            # no level passed
            elif not isinstance(self.grouper,
                                (Series, Index, ExtensionArray, np.ndarray)):
                if getattr(self.grouper, "ndim", 1) != 1:
                    t = self.name or str(type(self.grouper))
                    raise ValueError(
                        "Grouper for '{t}' not 1-dimensional".format(t=t))
                self.grouper = self.index.map(self.grouper)
                if not (hasattr(self.grouper, "__len__")
                        and len(self.grouper) == len(self.index)):
                    errmsg = ("Grouper result violates len(labels) == "
                              "len(data)\nresult: {grper}".format(
                                  grper=pprint_thing(self.grouper)))
                    self.grouper = None  # Try for sanity
                    raise AssertionError(errmsg)

        # if we have a date/time-like grouper, make sure that we have
        # Timestamps like
        if getattr(self.grouper, "dtype", None) is not None:
            if is_datetime64_dtype(self.grouper):
                self.grouper = self.grouper.astype("datetime64[ns]")
            elif is_timedelta64_dtype(self.grouper):

                self.grouper = self.grouper.astype("timedelta64[ns]")