def __init__(self, keys):
        self.per_locale = False
        self.keys = keys
        if isinstance(keys, pdarray):
            self.nkeys = 1
            self.size = keys.size
            if self.per_locale:
                self.permutation = local_argsort(keys)
            else:
                self.permutation = argsort(keys)
        elif isinstance(keys, Strings):
            self.nkeys = 1
            self.size = keys.size
            if self.per_locale:
                raise ValueError("per-locale groupby not supported on strings")
            else:
                self.permutation = keys.group()
        else:
            self.nkeys = len(keys)
            self.size = keys[0].size
            for k in keys:
                if k.size != self.size:
                    raise ValueError("Key arrays must all be same size")
            self.permutation = coargsort(keys)

        # self.permuted_keys = self.keys[self.permutation]
        self.find_segments()
Exemple #2
0
 def __init__(self, keys : List[Union[pdarray,np.int64,Strings]], 
             assume_sorted : bool=False, hash_strings : bool=True) -> None:
     self.logger = getArkoudaLogger(name=self.__class__.__name__)
     self.assume_sorted = assume_sorted
     self.hash_strings = hash_strings
     self.keys = keys
     if isinstance(keys, pdarray):
         self.nkeys = 1
         self.size = keys.size
         if assume_sorted:
             self.permutation = arange(self.size)
         else:
             self.permutation = argsort(keys)
     # for Strings or Categorical
     elif hasattr(keys, "group"):
         self.nkeys = 1
         self.size = keys.size
         if assume_sorted:
             self.permutation = arange(self.size)
         else:
             self.permutation = keys.group()
     else:
         self.nkeys = len(keys)
         self.size = keys[0].size
         for k in keys:
             if k.size != self.size:
                 raise ValueError("Key arrays must all be same size")
         if assume_sorted:
             self.permutation = arange(self.size)
         else:
             self.permutation = coargsort(keys)
         
     # self.permuted_keys = self.keys[self.permutation]
     self.find_segments()       
Exemple #3
0
    def __init__(self,
                 keys: Union[pdarray, Strings, 'Categorical',
                             List[Union[pdarray, np.int64, Strings]]],
                 assume_sorted: bool = False,
                 hash_strings: bool = True) -> None:
        from arkouda.categorical import Categorical
        self.logger = getArkoudaLogger(name=self.__class__.__name__)
        self.assume_sorted = assume_sorted
        self.hash_strings = hash_strings
        self.keys: Union[pdarray, Strings, Categorical]

        if isinstance(keys, pdarray):
            if keys.dtype != int64:
                raise TypeError(
                    'GroupBy only supports pdarrays with a dtype int64')
            self.keys = cast(pdarray, keys)
            self.nkeys = 1
            self.size = cast(int, keys.size)
            if assume_sorted:
                self.permutation = cast(pdarray, arange(self.size))
            else:
                self.permutation = cast(pdarray, argsort(keys))
        elif hasattr(keys, "group"):  # for Strings or Categorical
            self.nkeys = 1
            self.keys = cast(Union[Strings, Categorical], keys)
            self.size = cast(int, self.keys.size)  # type: ignore
            if assume_sorted:
                self.permutation = cast(pdarray, arange(self.size))
            else:
                self.permutation = cast(Union[Strings, Categorical],
                                        keys).group()
        else:
            self.keys = cast(Union[pdarray, Strings, Categorical], keys)
            self.nkeys = len(keys)
            self.size = cast(int, keys[0].size)  # type: ignore
            for k in keys:
                if k.size != self.size:
                    raise ValueError("Key arrays must all be same size")
            if assume_sorted:
                self.permutation = cast(pdarray, arange(self.size))
            else:
                self.permutation = cast(
                    pdarray, coargsort(cast(Sequence[pdarray], keys)))

        # self.permuted_keys = self.keys[self.permutation]
        self.find_segments()
Exemple #4
0
 def __init__(self, keys, assume_sorted=False, hash_strings=True):
     self.assume_sorted = assume_sorted
     self.hash_strings = hash_strings
     self.per_locale = False
     self.keys = keys
     if isinstance(keys, pdarray):
         self.nkeys = 1
         self.size = keys.size
         if assume_sorted:
             self.permutation = arange(self.size)
         elif self.per_locale:
             self.permutation = local_argsort(keys)
         else:
             self.permutation = argsort(keys)
     # for Strings or Categorical
     elif hasattr(keys, "group"):
         self.nkeys = 1
         self.size = keys.size
         if assume_sorted:
             self.permutation = arange(self.size)
         elif self.per_locale:
             raise ValueError("per-locale groupby not supported on Strings or Categorical")
         else:
             self.permutation = keys.group()
     else:
         self.nkeys = len(keys)
         self.size = keys[0].size
         for k in keys:
             if k.size != self.size:
                 raise ValueError("Key arrays must all be same size")
         if assume_sorted:
             self.permutation = arange(self.size)
         else:
             self.permutation = coargsort(keys)
         
     # self.permuted_keys = self.keys[self.permutation]
     self.find_segments()