def group_index(self) -> Index: if self._passed_categorical: # we make a CategoricalIndex out of the cat grouper # preserving the categories / ordered attributes cat = self.grouper categories = cat.categories if self.observed: codes = algorithms.unique1d(cat.codes) codes = codes[codes != -1] if self.sort or cat.ordered: codes = np.sort(codes) else: codes = np.arange(len(categories)) return CategoricalIndex( Categorical.from_codes(codes=codes, categories=categories, ordered=cat.ordered), name=self.name, ) if self._group_index is None: self._make_codes() assert self._group_index is not None return self._group_index
def _create_from_codes(self, codes, categories=None, ordered=None, name=None): """ *this is an internal non-public method* create the correct categorical from codes Parameters ---------- codes : new codes categories : optional categories, defaults to existing ordered : optional ordered attribute, defaults to existing name : optional name attribute, defaults to existing Returns ------- CategoricalIndex """ from pandas.core.arrays import Categorical if categories is None: categories = self.categories if ordered is None: ordered = self.ordered if name is None: name = self.name cat = Categorical.from_codes(codes, categories=categories, ordered=self.ordered) return CategoricalIndex(cat, name=name)
def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]: if self._passed_categorical: # we make a CategoricalIndex out of the cat grouper # preserving the categories / ordered attributes cat = self.grouper categories = cat.categories if self.observed: ucodes = algorithms.unique1d(cat.codes) ucodes = ucodes[ucodes != -1] if self.sort or cat.ordered: ucodes = np.sort(ucodes) else: ucodes = np.arange(len(categories)) uniques = Categorical.from_codes( codes=ucodes, categories=categories, ordered=cat.ordered ) return cat.codes, uniques elif isinstance(self.grouper, ops.BaseGrouper): # we have a list of groupers codes = self.grouper.codes_info uniques = self.grouper.result_arraylike else: # GH35667, replace dropna=False with na_sentinel=None if not self.dropna: na_sentinel = None else: na_sentinel = -1 codes, uniques = algorithms.factorize( self.grouper, sort=self.sort, na_sentinel=na_sentinel ) return codes, uniques
def _codes_and_uniques( self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]: if self._passed_categorical: # we make a CategoricalIndex out of the cat grouper # preserving the categories / ordered attributes cat = self.grouping_vector categories = cat.categories if self._observed: ucodes = algorithms.unique1d(cat.codes) ucodes = ucodes[ucodes != -1] if self._sort or cat.ordered: ucodes = np.sort(ucodes) else: ucodes = np.arange(len(categories)) uniques = Categorical.from_codes(codes=ucodes, categories=categories, ordered=cat.ordered) return cat.codes, uniques elif isinstance(self.grouping_vector, ops.BaseGrouper): # we have a list of groupers codes = self.grouping_vector.codes_info # error: Incompatible types in assignment (expression has type "Union # [ExtensionArray, ndarray[Any, Any]]", variable has type "Categorical") uniques = ( self.grouping_vector.result_index. _values # type: ignore[assignment] ) else: # GH35667, replace dropna=False with use_na_sentinel=False # error: Incompatible types in assignment (expression has type "Union[ # ndarray[Any, Any], Index]", variable has type "Categorical") codes, uniques = algorithms.factorize( # type: ignore[assignment] self.grouping_vector, sort=self._sort, use_na_sentinel=self._dropna) return codes, uniques
def groups(self) -> dict[Hashable, np.ndarray]: return self.index.groupby( Categorical.from_codes(self.codes, self.group_index))
def __init__( self, index: Index, grouper=None, obj: FrameOrSeries | None = None, name: Hashable = None, level=None, sort: bool = True, observed: bool = False, in_axis: bool = False, dropna: bool = True, ): self.name = name self.level = level self.grouper = _convert_grouper(index, grouper) self.all_grouper = None self.index = index self.sort = sort self.obj = obj self.observed = observed self.in_axis = in_axis self.dropna = dropna # right place for this? if isinstance(grouper, (Series, Index)) and name is None: self.name = grouper.name if isinstance(grouper, MultiIndex): self.grouper = grouper._values # we have a single grouper which may be a myriad of things, # some of which are dependent on the passing in level if level is not None: if not isinstance(level, int): if level not in index.names: raise AssertionError(f"Level {level} not in index") level = index.names.index(level) if self.name is None: self.name = index.names[level] ( self.grouper, self._codes, self._group_index, ) = index._get_grouper_for_level(self.grouper, level) # a passed Grouper like, directly get the grouper in the same way # as single grouper groupby, use the group_info to get codes elif isinstance(self.grouper, Grouper): # get the new grouper; we already have disambiguated # what key/level refer to exactly, don't need to # check again as we have by this point converted these # to an actual value (rather than a pd.Grouper) _, grouper, _ = self.grouper._get_grouper( # error: Value of type variable "FrameOrSeries" of "_get_grouper" # of "Grouper" cannot be "Optional[FrameOrSeries]" self.obj, # type: ignore[type-var] validate=False, ) if self.name is None: self.name = grouper.result_index.name self.obj = self.grouper.obj self.grouper = grouper._get_grouper() else: if self.grouper is None and self.name is not None and self.obj is not None: self.grouper = self.obj[self.name] elif isinstance(self.grouper, (list, tuple)): self.grouper = com.asarray_tuplesafe(self.grouper) # a passed Categorical elif is_categorical_dtype(self.grouper): self.grouper, self.all_grouper = recode_for_groupby( self.grouper, self.sort, observed) categories = self.grouper.categories # we make a CategoricalIndex out of the cat grouper # preserving the categories / ordered attributes self._codes = self.grouper.codes if observed: codes = algorithms.unique1d(self.grouper.codes) codes = codes[codes != -1] if sort or self.grouper.ordered: codes = np.sort(codes) else: codes = np.arange(len(categories)) self._group_index = CategoricalIndex( Categorical.from_codes(codes=codes, categories=categories, ordered=self.grouper.ordered), name=self.name, ) # we are done if isinstance(self.grouper, Grouping): self.grouper = self.grouper.grouper # no level passed elif not isinstance(self.grouper, (Series, Index, ExtensionArray, np.ndarray)): if getattr(self.grouper, "ndim", 1) != 1: t = self.name or str(type(self.grouper)) raise ValueError(f"Grouper for '{t}' not 1-dimensional") self.grouper = self.index.map(self.grouper) if not (hasattr(self.grouper, "__len__") and len(self.grouper) == len(self.index)): grper = pprint_thing(self.grouper) errmsg = ("Grouper result violates len(labels) == " f"len(data)\nresult: {grper}") self.grouper = None # Try for sanity raise AssertionError(errmsg) # if we have a date/time-like grouper, make sure that we have # Timestamps like if getattr(self.grouper, "dtype", None) is not None: if is_datetime64_dtype(self.grouper): self.grouper = self.grouper.astype("datetime64[ns]") elif is_timedelta64_dtype(self.grouper): self.grouper = self.grouper.astype("timedelta64[ns]")
def groups(self): return self.index.groupby( Categorical.from_codes(self.labels, self.group_index))
def __init__(self, index, grouper=None, obj=None, name=None, level=None, sort=True, observed=False, in_axis=False): self.name = name self.level = level self.grouper = _convert_grouper(index, grouper) self.all_grouper = None self.index = index self.sort = sort self.obj = obj self.observed = observed self.in_axis = in_axis # right place for this? if isinstance(grouper, (Series, Index)) and name is None: self.name = grouper.name if isinstance(grouper, MultiIndex): self.grouper = grouper.values # we have a single grouper which may be a myriad of things, # some of which are dependent on the passing in level if level is not None: if not isinstance(level, int): if level not in index.names: raise AssertionError('Level {} not in index'.format(level)) level = index.names.index(level) if self.name is None: self.name = index.names[level] self.grouper, self._labels, self._group_index = \ index._get_grouper_for_level(self.grouper, level) # a passed Grouper like, directly get the grouper in the same way # as single grouper groupby, use the group_info to get labels elif isinstance(self.grouper, Grouper): # get the new grouper; we already have disambiguated # what key/level refer to exactly, don't need to # check again as we have by this point converted these # to an actual value (rather than a pd.Grouper) _, grouper, _ = self.grouper._get_grouper(self.obj, validate=False) if self.name is None: self.name = grouper.result_index.name self.obj = self.grouper.obj self.grouper = grouper._get_grouper() else: if self.grouper is None and self.name is not None: self.grouper = self.obj[self.name] elif isinstance(self.grouper, (list, tuple)): self.grouper = com.asarray_tuplesafe(self.grouper) # a passed Categorical elif is_categorical_dtype(self.grouper): from pandas.core.groupby.categorical import recode_for_groupby self.grouper, self.all_grouper = recode_for_groupby( self.grouper, self.sort, observed) categories = self.grouper.categories # we make a CategoricalIndex out of the cat grouper # preserving the categories / ordered attributes self._labels = self.grouper.codes if observed: codes = algorithms.unique1d(self.grouper.codes) codes = codes[codes != -1] if sort or self.grouper.ordered: codes = np.sort(codes) else: codes = np.arange(len(categories)) self._group_index = CategoricalIndex( Categorical.from_codes( codes=codes, categories=categories, ordered=self.grouper.ordered)) # we are done if isinstance(self.grouper, Grouping): self.grouper = self.grouper.grouper # no level passed elif not isinstance(self.grouper, (Series, Index, ExtensionArray, np.ndarray)): if getattr(self.grouper, 'ndim', 1) != 1: t = self.name or str(type(self.grouper)) raise ValueError( "Grouper for '{}' not 1-dimensional".format(t)) self.grouper = self.index.map(self.grouper) if not (hasattr(self.grouper, "__len__") and len(self.grouper) == len(self.index)): errmsg = ('Grouper result violates len(labels) == ' 'len(data)\nresult: %s' % pprint_thing(self.grouper)) self.grouper = None # Try for sanity raise AssertionError(errmsg) # if we have a date/time-like grouper, make sure that we have # Timestamps like if getattr(self.grouper, 'dtype', None) is not None: if is_datetime64_dtype(self.grouper): from pandas import to_datetime self.grouper = to_datetime(self.grouper) elif is_timedelta64_dtype(self.grouper): from pandas import to_timedelta self.grouper = to_timedelta(self.grouper)
def groups(self): return self.index.groupby(Categorical.from_codes(self.labels, self.group_index))
def groups(self) -> dict: return self.index.groupby( Categorical.from_codes(self.codes, self.group_index))