예제 #1
0
 def __init__(self, values, **kwargs):
     if 'codes' in kwargs and 'categories' in kwargs:
         # This initialization is called by Categorical.from_codes()
         # The values arg is ignored
         self.codes = kwargs['codes']
         self.categories = kwargs['categories']            
         if 'permutation' in kwargs:
             self.permutation = kwargs['permutation']
         if 'segments' in kwargs:
             self.segments = kwargs['segments']
     else:
         # Typical initialization, called with values
         if not isinstance(values, Strings):
             raise ValueError("Categorical: inputs other than Strings not yet supported")
         g = GroupBy(values)
         self.categories = g.unique_keys
         self.codes = zeros(values.size, dtype=int64)
         self.codes[g.permutation] = g.broadcast(arange(self.categories.size))
         self.permutation = g.permutation
         self.segments = g.segments
     # Always set these values
     self.size = self.codes.size
     self.nlevels = self.categories.size
     self.ndim = self.codes.ndim
     self.shape = self.codes.shape
예제 #2
0
 def __init__(self, values, **kwargs) -> None:
     self.logger = getArkoudaLogger(name=__class__.__name__)  # type: ignore
     if 'codes' in kwargs and 'categories' in kwargs:
         # This initialization is called by Categorical.from_codes()
         # The values arg is ignored
         self.codes = kwargs['codes']
         self.categories = kwargs['categories']
         if 'permutation' in kwargs:
             self.permutation = cast(pdarray, kwargs['permutation'])
         if 'segments' in kwargs:
             self.segments = cast(pdarray, kwargs['segments'])
     else:
         # Typical initialization, called with values
         if not isinstance(values, Strings):
             raise ValueError(("Categorical: inputs other than " +
                               "Strings not yet supported"))
         g = GroupBy(values)
         self.categories = g.unique_keys
         self.codes = g.broadcast(arange(self.categories.size),
                                  permute=True)
         self.permutation = cast(pdarray, g.permutation)
         self.segments = g.segments
     # Always set these values
     self.size: int_scalars = self.codes.size
     self.nlevels = self.categories.size
     self.ndim = self.codes.ndim
     self.shape = self.codes.shape
     self.name: Optional[str] = None
예제 #3
0
 def reset_categories(self):
     """
     Recompute the category labels, discarding any unused labels. This method
     is often useful after slicing or indexing a Categorical array, when the
     resulting array only contains a subset of the original categories. In
     this case, eliminating unused categories can speed up other operations.
     """
     g = GroupBy(self.codes)
     idx = self.categories[g.unique_keys]
     newvals = zeros(self.codes.size, int64)
     newvals[g.permutation] = g.broadcast(arange(idx.size))
     return Categorical.from_codes(newvals, idx, permutation=g.permutation, segments=g.segments)
예제 #4
0
 def reset_categories(self) -> Categorical:
     """
     Recompute the category labels, discarding any unused labels. This
     method is often useful after slicing or indexing a Categorical array, 
     when the resulting array only contains a subset of the original 
     categories. In this case, eliminating unused categories can speed up 
     other operations.
     
     Returns
     -------
     Categorical
         A Categorical object generated from the current instance
     """
     g = GroupBy(self.codes)
     idx = self.categories[g.unique_keys]
     newvals = g.broadcast(arange(idx.size), permute=True)
     return Categorical.from_codes(newvals,
                                   idx,
                                   permutation=g.permutation,
                                   segments=g.segments)
예제 #5
0
    def concatenate(self,
                    others: Sequence[Categorical],
                    ordered: bool = True) -> Categorical:
        """
        Merge this Categorical with other Categorical objects in the array, 
        concatenating the arrays and synchronizing the categories.

        Parameters
        ----------
        others : Sequence[Categorical]
            The Categorical arrays to concatenate and merge with this one
        ordered : bool
            If True (default), the arrays will be appended in the
            order given. If False, array data may be interleaved
            in blocks, which can greatly improve performance but
            results in non-deterministic ordering of elements.

        Returns
        -------
        Categorical 
            The merged Categorical object
            
        Raises
        ------
        TypeError
            Raised if any others array objects are not Categorical objects

        Notes
        -----
        This operation can be expensive -- slower than concatenating Strings.
        """
        if isinstance(others, Categorical):
            others = [others]
        elif len(others) < 1:
            return self
        samecategories = True
        for c in others:
            if not isinstance(c, Categorical):
                raise TypeError(("Categorical: can only merge/concatenate " +
                                 "with other Categoricals"))
            if (self.categories.size != c.categories.size) or not \
                                    (self.categories == c.categories).all():
                samecategories = False
        if samecategories:
            newvals = cast(
                pdarray,
                concatenate([self.codes] + [o.codes for o in others],
                            ordered=ordered))
            return Categorical.from_codes(newvals, self.categories)
        else:
            g = GroupBy(concatenate([self.categories] + \
                                       [o.categories for o in others],
                                       ordered=True))
            newidx = g.unique_keys
            wherediditgo = g.broadcast(arange(newidx.size), permute=True)
            idxsizes = np.array([self.categories.size] + \
                                [o.categories.size for o in others])
            idxoffsets = np.cumsum(idxsizes) - idxsizes
            oldvals = concatenate([c + off for c, off in \
                                   zip([self.codes] + [o.codes for o in others], idxoffsets)],
                                  ordered=ordered)
            newvals = wherediditgo[oldvals]
            return Categorical.from_codes(newvals, newidx)