def binop(self, other, op): if op not in self.BinOps: raise NotImplementedError("Categorical: unsupported operator: {}".format(op)) if np.isscalar(other) and resolve_scalar_dtype(other) == "str": idxresult = self.categories.binop(other, op) return idxresult[self.codes] if self.size != other.size: raise ValueError("Categorical {}: size mismatch {} {}".format(op, self.size, other.size)) if isinstance(other, Categorical): if self.categories.name == other.categories.name: return self.codes.binop(other.codes, op) else: raise NotImplementedError("Operations between Categoricals with different indices not yet implemented") else: raise NotImplementedError("Operations between Categorical and non-Categorical not yet implemented. Consider converting operands to Categorical.")
def _binop(self, other: Union[Strings, np.str_, str], op: str) -> pdarray: """ Executes the requested binop on this Strings instance and the parameter Strings object and returns the results within a pdarray object. Parameters ---------- other : Strings, np.str_, or str the other object is a Strings object op : str name of the binary operation to be performed Returns ------- pdarray encapsulating the results of the requested binop Raises - ----- ValueError Raised if (1) the op is not in the self.BinOps set, or (2) if the sizes of this and the other instance don't match, or (3) the other object is not a Strings object RuntimeError Raised if a server-side error is thrown while executing the binary operation """ if op not in self.BinOps: raise ValueError("Strings: unsupported operator: {}".format(op)) if isinstance(other, Strings): if self.size != other.size: raise ValueError("Strings: size mismatch {} {}".\ format(self.size, other.size)) cmd = "segmentedBinopvv" args = "{} {} {} {} {} {} {}".format( op, self.objtype, self.offsets.name, self.bytes.name, other.objtype, other.offsets.name, other.bytes.name) elif resolve_scalar_dtype(other) == 'str': cmd = "segmentedBinopvs" args = "{} {} {} {} {} {}".format(op, self.objtype, self.offsets.name, self.bytes.name, self.objtype, json.dumps([other])) else: raise ValueError("Strings: {} not supported between Strings and {}"\ .format(op, other.__class__.__name__)) return create_pdarray(generic_msg(cmd=cmd, args=args))
def __getitem__(self, key): if np.isscalar(key) and resolve_scalar_dtype(key) == 'int64': orig_key = key if key < 0: # Interpret negative key as offset from end of array key += self.size if (key >= 0 and key < self.size): cmd = "segmentedIndex" args = " {} {} {} {} {}".format('intIndex', self.objtype, self.offsets.name, self.bytes.name, key) repMsg = generic_msg(cmd=cmd, args=args) _, value = repMsg.split(maxsplit=1) return parse_single_value(value) else: raise IndexError("[int] {} is out of bounds with size {}".\ format(orig_key,self.size)) elif isinstance(key, slice): (start, stop, stride) = key.indices(self.size) self.logger.debug('start: {}; stop: {}; stride: {}'.format( start, stop, stride)) cmd = "segmentedIndex" args = " {} {} {} {} {} {} {}".format('sliceIndex', self.objtype, self.offsets.name, self.bytes.name, start, stop, stride) repMsg = generic_msg(cmd=cmd, args=args) offsets, values = repMsg.split('+') return Strings(offsets, values) elif isinstance(key, pdarray): kind, _ = translate_np_dtype(key.dtype) if kind not in ("bool", "int"): raise TypeError("unsupported pdarray index type {}".format( key.dtype)) if kind == "bool" and self.size != key.size: raise ValueError("size mismatch {} {}".format( self.size, key.size)) cmd = "segmentedIndex" args = "{} {} {} {} {}".format('pdarrayIndex', self.objtype, self.offsets.name, self.bytes.name, key.name) repMsg = generic_msg(cmd, args) offsets, values = repMsg.split('+') return Strings(offsets, values) else: raise TypeError("unsupported pdarray index type {}".format( key.__class__.__name__))
def _binop(self, other: Categorical, op: str) -> pdarray: """ Executes the requested binop on this Categorical instance and returns the results within a pdarray object. Parameters ---------- other : Categorical the other object is a Categorical object op : str name of the binary operation to be performed Returns ------- pdarray encapsulating the results of the requested binop Raises - ----- ValueError Raised if (1) the op is not in the self.BinOps set, or (2) if the sizes of this and the other instance don't match RuntimeError Raised if a server-side error is thrown while executing the binary operation """ if op not in self.BinOps: raise NotImplementedError("Categorical: unsupported operator: {}".\ format(op)) if np.isscalar(other) and resolve_scalar_dtype(other) == "str": idxresult = self.categories._binop(other, op) return idxresult[self.codes] if self.size != other.size: raise ValueError("Categorical {}: size mismatch {} {}".\ format(op, self.size, other.size)) if isinstance(other, Categorical): if self.categories.name == other.categories.name: return self.codes.binop(other.codes, op) else: raise NotImplementedError( ("Operations between Categoricals " + "with different indices not yet implemented")) else: raise NotImplementedError( ("Operations between Categorical and " + "non-Categorical not yet implemented. " + "Consider converting operands to Categorical."))
def test_resolve_scalar_dtype(self): ''' Tests dtypes.resolve_scalar_dtype method :return: None :raise: AssertionError if 1.. test cases fail ''' self.assertEqual('bool', dtypes.resolve_scalar_dtype(True)) self.assertEqual('int64', dtypes.resolve_scalar_dtype(1)) self.assertEqual('float64', dtypes.resolve_scalar_dtype(float(0.0))) self.assertEqual('str', dtypes.resolve_scalar_dtype('test')) self.assertEqual('int64', dtypes.resolve_scalar_dtype(np.int64(1))) self.assertEqual("<class 'list'>", dtypes.resolve_scalar_dtype([1]))
def __getitem__(self, key): if np.isscalar(key) and resolve_scalar_dtype(key) == 'int64': return self.categories[self.codes[key]] else: return Categorical.from_codes(self.codes[key], self.categories)
def join_on_eq_with_dt(a1: pdarray, a2: pdarray, t1: pdarray, t2: pdarray, dt: int, pred: str, result_limit: int = 1000) -> Tuple[pdarray, pdarray]: """ Performs an inner-join on equality between two integer arrays where the time-window predicate is also true Parameters ---------- a1 : pdarray, int64 pdarray to be joined a2 : pdarray, int64 pdarray to be joined t1 : pdarray timestamps in millis corresponding to the a1 pdarray t2 : pdarray, timestamps in millis corresponding to the a2 pdarray dt : int time delta pred : str time window predicate result_limit : int size limit for returned result Returns ------- result_array_one : pdarray, int64 a1 indices where a1 == a2 result_array_one : pdarray, int64 a2 indices where a2 == a1 Raises ------ TypeError Raised if a1, a2, t1, or t2 is not a pdarray, or if dt or result_limit is not an int ValueError if a1, a2, t1, or t2 dtype is not int64, pred is not 'true_dt', 'abs_dt', or 'pos_dt', or result_limit is < 0 """ if not (a1.dtype == akint64): raise ValueError("a1 must be int64 dtype") if not (a2.dtype == akint64): raise ValueError("a2 must be int64 dtype") if not (t1.dtype == akint64): raise ValueError("t1 must be int64 dtype") if not (t2.dtype == akint64): raise ValueError("t2 must be int64 dtype") if not (pred in predicates.keys()): raise ValueError("pred must be one of ", predicates.keys()) if result_limit < 0: raise ValueError('the result_limit must 0 or greater') # format numbers for request message dttype = resolve_scalar_dtype(dt) dtstr = NUMBER_FORMAT_STRINGS[dttype].format(dt) predtype = resolve_scalar_dtype(predicates[pred]) predstr = NUMBER_FORMAT_STRINGS[predtype].format(predicates[pred]) result_limittype = resolve_scalar_dtype(result_limit) result_limitstr = NUMBER_FORMAT_STRINGS[result_limittype].\ format(result_limit) # groupby on a2 g2 = GroupBy(a2) # pass result into server joinEqWithDT operation repMsg = generic_msg("joinEqWithDT {} {} {} {} {} {} {} {} {}".\ format(a1.name, cast(pdarray, g2.segments).name, # type: ignore cast(pdarray, g2.unique_keys).name, # type: ignore g2.permutation.name, t1.name, t2.name, dtstr, predstr, result_limitstr)) # create pdarrays for results resIAttr, resJAttr = cast(str, repMsg).split("+") resI = create_pdarray(resIAttr) resJ = create_pdarray(resJAttr) return (resI, resJ)
def where(condition: pdarray, A: Union[numeric_scalars, pdarray], B: Union[numeric_scalars, pdarray]) -> pdarray: """ Returns an array with elements chosen from A and B based upon a conditioning array. As is the case with numpy.where, the return array consists of values from the first array (A) where the conditioning array elements are True and from the second array (B) where the conditioning array elements are False. Parameters ---------- condition : pdarray Used to choose values from A or B A : Union[numeric_scalars, pdarray] Value(s) used when condition is True B : Union[numeric_scalars, pdarray] Value(s) used when condition is False Returns ------- pdarray Values chosen from A where the condition is True and B where the condition is False Raises ------ TypeError Raised if the condition object is not a pdarray, if A or B is not an int, np.int64, float, np.float64, or pdarray, if pdarray dtypes are not supported or do not match, or multiple condition clauses (see Notes section) are applied ValueError Raised if the shapes of the condition, A, and B pdarrays are unequal Examples -------- >>> a1 = ak.arange(1,10) >>> a2 = ak.ones(9, dtype=np.int64) >>> cond = a1 < 5 >>> ak.where(cond,a1,a2) array([1, 2, 3, 4, 1, 1, 1, 1, 1]) >>> a1 = ak.arange(1,10) >>> a2 = ak.ones(9, dtype=np.int64) >>> cond = a1 == 5 >>> ak.where(cond,a1,a2) array([1, 1, 1, 1, 5, 1, 1, 1, 1]) >>> a1 = ak.arange(1,10) >>> a2 = 10 >>> cond = a1 < 5 >>> ak.where(cond,a1,a2) array([1, 2, 3, 4, 10, 10, 10, 10, 10]) Notes ----- A and B must have the same dtype and only one conditional clause is supported e.g., n < 5, n > 1, which is supported in numpy is not currently supported in Arkouda """ if (not isSupportedNumber(A) and not isinstance(A,pdarray)) or \ (not isSupportedNumber(B) and not isinstance(B,pdarray)): raise TypeError( 'both A and B must be an int, np.int64, float, np.float64, or pdarray' ) if isinstance(A, pdarray) and isinstance(B, pdarray): repMsg = generic_msg(cmd="efunc3vv", args="{} {} {} {}".\ format("where", condition.name, A.name, B.name)) # For scalars, try to convert it to the array's dtype elif isinstance(A, pdarray) and np.isscalar(B): repMsg = generic_msg(cmd="efunc3vs", args="{} {} {} {} {}".\ format("where", condition.name, A.name, A.dtype.name, A.format_other(B))) elif isinstance(B, pdarray) and np.isscalar(A): repMsg = generic_msg(cmd="efunc3sv", args="{} {} {} {} {}".\ format("where", condition.name, B.dtype.name, B.format_other(A), B.name)) elif np.isscalar(A) and np.isscalar(B): # Scalars must share a common dtype (or be cast) dtA = resolve_scalar_dtype(A) dtB = resolve_scalar_dtype(B) # Make sure at least one of the dtypes is supported if not (dtA in DTypes or dtB in DTypes): raise TypeError( ("Not implemented for scalar types {} " + "and {}").format( dtA, dtB)) # If the dtypes are the same, do not cast if dtA == dtB: # type: ignore dt = dtA # If the dtypes are different, try casting one direction then the other elif dtB in DTypes and np.can_cast(A, dtB): A = np.dtype(dtB).type(A) dt = dtB elif dtA in DTypes and np.can_cast(B, dtA): B = np.dtype(dtA).type(B) dt = dtA # Cannot safely cast else: raise TypeError(("Cannot cast between scalars {} and {} to " + "supported dtype").format(A, B)) repMsg = generic_msg(cmd="efunc3ss", args="{} {} {} {} {} {}".\ format("where", condition.name, dt, A, dt, B)) return create_pdarray(type_cast(str, repMsg))
def _binop(self, other: Union[Categorical, str_scalars], op: str_scalars) -> pdarray: """ Executes the requested binop on this Categorical instance and returns the results within a pdarray object. Parameters ---------- other : Union[Categorical,str_scalars] the other object is a Categorical object or string scalar op : str_scalars name of the binary operation to be performed Returns ------- pdarray encapsulating the results of the requested binop Raises - ----- ValueError Raised if (1) the op is not in the self.BinOps set, or (2) if the sizes of this and the other instance don't match RuntimeError Raised if a server-side error is thrown while executing the binary operation """ if op not in self.BinOps: raise NotImplementedError("Categorical: unsupported operator: {}".\ format(op)) if np.isscalar(other) and resolve_scalar_dtype(other) == "str": idxresult = self.categories._binop(other, op) return idxresult[self.codes] if self.size != cast(Categorical, other).size: raise ValueError("Categorical {}: size mismatch {} {}".\ format(op, self.size, cast(Categorical,other).size)) if isinstance(other, Categorical): if (self.categories.size == other.categories.size) and (self.categories == other.categories).all(): # Because categories are identical, codes can be compared directly return self.codes._binop(other.codes, op) else: # Remap both codes to the union of categories union = unique( concatenate((self.categories, other.categories), ordered=False)) newinds = arange(union.size) # Inds of self.categories in unioned categories selfnewinds = newinds[in1d(union, self.categories)] # Need a permutation and segments to broadcast new codes if self.permutation is None or self.segments is None: g = GroupBy(self.codes) self.permutation = g.permutation self.segments = g.segments # Form new codes by broadcasting new indices for unioned categories selfnewcodes = broadcast(self.segments, selfnewinds, self.size, self.permutation) # Repeat for other othernewinds = newinds[in1d(union, other.categories)] if other.permutation is None or other.segments is None: g = GroupBy(other.codes) other.permutation = g.permutation other.segments = g.segments othernewcodes = broadcast(other.segments, othernewinds, other.size, other.permutation) # selfnewcodes and othernewcodes now refer to same unioned categories # and can be compared directly return selfnewcodes._binop(othernewcodes, op) else: raise NotImplementedError( ("Operations between Categorical and " + "non-Categorical not yet implemented. " + "Consider converting operands to Categorical."))