def __init__(self, values, **kwargs): if 'codes' in kwargs and 'categories' in kwargs: # This initialization is called by Categorical.from_codes() # The values arg is ignored self.codes = kwargs['codes'] self.categories = kwargs['categories'] if 'permutation' in kwargs: self.permutation = kwargs['permutation'] if 'segments' in kwargs: self.segments = kwargs['segments'] else: # Typical initialization, called with values if not isinstance(values, Strings): raise ValueError("Categorical: inputs other than Strings not yet supported") g = GroupBy(values) self.categories = g.unique_keys self.codes = zeros(values.size, dtype=int64) self.codes[g.permutation] = g.broadcast(arange(self.categories.size)) self.permutation = g.permutation self.segments = g.segments # Always set these values self.size = self.codes.size self.nlevels = self.categories.size self.ndim = self.codes.ndim self.shape = self.codes.shape
def __init__(self, values, **kwargs) -> None: self.logger = getArkoudaLogger(name=__class__.__name__) # type: ignore if 'codes' in kwargs and 'categories' in kwargs: # This initialization is called by Categorical.from_codes() # The values arg is ignored self.codes = kwargs['codes'] self.categories = kwargs['categories'] if 'permutation' in kwargs: self.permutation = cast(pdarray, kwargs['permutation']) if 'segments' in kwargs: self.segments = cast(pdarray, kwargs['segments']) else: # Typical initialization, called with values if not isinstance(values, Strings): raise ValueError(("Categorical: inputs other than " + "Strings not yet supported")) g = GroupBy(values) self.categories = g.unique_keys self.codes = g.broadcast(arange(self.categories.size), permute=True) self.permutation = cast(pdarray, g.permutation) self.segments = g.segments # Always set these values self.size: int_scalars = self.codes.size self.nlevels = self.categories.size self.ndim = self.codes.ndim self.shape = self.codes.shape self.name: Optional[str] = None
def reset_categories(self): """ Recompute the category labels, discarding any unused labels. This method is often useful after slicing or indexing a Categorical array, when the resulting array only contains a subset of the original categories. In this case, eliminating unused categories can speed up other operations. """ g = GroupBy(self.codes) idx = self.categories[g.unique_keys] newvals = zeros(self.codes.size, int64) newvals[g.permutation] = g.broadcast(arange(idx.size)) return Categorical.from_codes(newvals, idx, permutation=g.permutation, segments=g.segments)
def reset_categories(self) -> Categorical: """ Recompute the category labels, discarding any unused labels. This method is often useful after slicing or indexing a Categorical array, when the resulting array only contains a subset of the original categories. In this case, eliminating unused categories can speed up other operations. Returns ------- Categorical A Categorical object generated from the current instance """ g = GroupBy(self.codes) idx = self.categories[g.unique_keys] newvals = g.broadcast(arange(idx.size), permute=True) return Categorical.from_codes(newvals, idx, permutation=g.permutation, segments=g.segments)
def concatenate(self, others: Sequence[Categorical], ordered: bool = True) -> Categorical: """ Merge this Categorical with other Categorical objects in the array, concatenating the arrays and synchronizing the categories. Parameters ---------- others : Sequence[Categorical] The Categorical arrays to concatenate and merge with this one ordered : bool If True (default), the arrays will be appended in the order given. If False, array data may be interleaved in blocks, which can greatly improve performance but results in non-deterministic ordering of elements. Returns ------- Categorical The merged Categorical object Raises ------ TypeError Raised if any others array objects are not Categorical objects Notes ----- This operation can be expensive -- slower than concatenating Strings. """ if isinstance(others, Categorical): others = [others] elif len(others) < 1: return self samecategories = True for c in others: if not isinstance(c, Categorical): raise TypeError(("Categorical: can only merge/concatenate " + "with other Categoricals")) if (self.categories.size != c.categories.size) or not \ (self.categories == c.categories).all(): samecategories = False if samecategories: newvals = cast( pdarray, concatenate([self.codes] + [o.codes for o in others], ordered=ordered)) return Categorical.from_codes(newvals, self.categories) else: g = GroupBy(concatenate([self.categories] + \ [o.categories for o in others], ordered=True)) newidx = g.unique_keys wherediditgo = g.broadcast(arange(newidx.size), permute=True) idxsizes = np.array([self.categories.size] + \ [o.categories.size for o in others]) idxoffsets = np.cumsum(idxsizes) - idxsizes oldvals = concatenate([c + off for c, off in \ zip([self.codes] + [o.codes for o in others], idxoffsets)], ordered=ordered) newvals = wherediditgo[oldvals] return Categorical.from_codes(newvals, newidx)