def __init__(self, keys): self.per_locale = False self.keys = keys if isinstance(keys, pdarray): self.nkeys = 1 self.size = keys.size if self.per_locale: self.permutation = local_argsort(keys) else: self.permutation = argsort(keys) elif isinstance(keys, Strings): self.nkeys = 1 self.size = keys.size if self.per_locale: raise ValueError("per-locale groupby not supported on strings") else: self.permutation = keys.group() else: self.nkeys = len(keys) self.size = keys[0].size for k in keys: if k.size != self.size: raise ValueError("Key arrays must all be same size") self.permutation = coargsort(keys) # self.permuted_keys = self.keys[self.permutation] self.find_segments()
def __init__(self, keys : List[Union[pdarray,np.int64,Strings]], assume_sorted : bool=False, hash_strings : bool=True) -> None: self.logger = getArkoudaLogger(name=self.__class__.__name__) self.assume_sorted = assume_sorted self.hash_strings = hash_strings self.keys = keys if isinstance(keys, pdarray): self.nkeys = 1 self.size = keys.size if assume_sorted: self.permutation = arange(self.size) else: self.permutation = argsort(keys) # for Strings or Categorical elif hasattr(keys, "group"): self.nkeys = 1 self.size = keys.size if assume_sorted: self.permutation = arange(self.size) else: self.permutation = keys.group() else: self.nkeys = len(keys) self.size = keys[0].size for k in keys: if k.size != self.size: raise ValueError("Key arrays must all be same size") if assume_sorted: self.permutation = arange(self.size) else: self.permutation = coargsort(keys) # self.permuted_keys = self.keys[self.permutation] self.find_segments()
def __init__(self, keys: Union[pdarray, Strings, 'Categorical', List[Union[pdarray, np.int64, Strings]]], assume_sorted: bool = False, hash_strings: bool = True) -> None: from arkouda.categorical import Categorical self.logger = getArkoudaLogger(name=self.__class__.__name__) self.assume_sorted = assume_sorted self.hash_strings = hash_strings self.keys: Union[pdarray, Strings, Categorical] if isinstance(keys, pdarray): if keys.dtype != int64: raise TypeError( 'GroupBy only supports pdarrays with a dtype int64') self.keys = cast(pdarray, keys) self.nkeys = 1 self.size = cast(int, keys.size) if assume_sorted: self.permutation = cast(pdarray, arange(self.size)) else: self.permutation = cast(pdarray, argsort(keys)) elif hasattr(keys, "group"): # for Strings or Categorical self.nkeys = 1 self.keys = cast(Union[Strings, Categorical], keys) self.size = cast(int, self.keys.size) # type: ignore if assume_sorted: self.permutation = cast(pdarray, arange(self.size)) else: self.permutation = cast(Union[Strings, Categorical], keys).group() else: self.keys = cast(Union[pdarray, Strings, Categorical], keys) self.nkeys = len(keys) self.size = cast(int, keys[0].size) # type: ignore for k in keys: if k.size != self.size: raise ValueError("Key arrays must all be same size") if assume_sorted: self.permutation = cast(pdarray, arange(self.size)) else: self.permutation = cast( pdarray, coargsort(cast(Sequence[pdarray], keys))) # self.permuted_keys = self.keys[self.permutation] self.find_segments()
def __init__(self, keys, assume_sorted=False, hash_strings=True): self.assume_sorted = assume_sorted self.hash_strings = hash_strings self.per_locale = False self.keys = keys if isinstance(keys, pdarray): self.nkeys = 1 self.size = keys.size if assume_sorted: self.permutation = arange(self.size) elif self.per_locale: self.permutation = local_argsort(keys) else: self.permutation = argsort(keys) # for Strings or Categorical elif hasattr(keys, "group"): self.nkeys = 1 self.size = keys.size if assume_sorted: self.permutation = arange(self.size) elif self.per_locale: raise ValueError("per-locale groupby not supported on Strings or Categorical") else: self.permutation = keys.group() else: self.nkeys = len(keys) self.size = keys[0].size for k in keys: if k.size != self.size: raise ValueError("Key arrays must all be same size") if assume_sorted: self.permutation = arange(self.size) else: self.permutation = coargsort(keys) # self.permuted_keys = self.keys[self.permutation] self.find_segments()