Esempio n. 1
0
 def binop(self, other, op):
     if op not in self.BinOps:
         raise NotImplementedError("Categorical: unsupported operator: {}".format(op))
     if np.isscalar(other) and resolve_scalar_dtype(other) == "str":
         idxresult = self.categories.binop(other, op)
         return idxresult[self.codes]
     if self.size != other.size:
         raise ValueError("Categorical {}: size mismatch {} {}".format(op, self.size, other.size))
     if isinstance(other, Categorical):
         if self.categories.name == other.categories.name:
             return self.codes.binop(other.codes, op)
         else:
             raise NotImplementedError("Operations between Categoricals with different indices not yet implemented")
     else:
         raise NotImplementedError("Operations between Categorical and non-Categorical not yet implemented. Consider converting operands to Categorical.")
Esempio n. 2
0
    def _binop(self, other: Union[Strings, np.str_, str], op: str) -> pdarray:
        """
        Executes the requested binop on this Strings instance and the
        parameter Strings object and returns the results within
        a pdarray object.

        Parameters
        ----------
        other : Strings, np.str_, or str
            the other object is a Strings object
        op : str
            name of the binary operation to be performed 
      
        Returns
        -------
        pdarray
            encapsulating the results of the requested binop      

        Raises
    -   -----
        ValueError
            Raised if (1) the op is not in the self.BinOps set, or (2) if the
            sizes of this and the other instance don't match, or (3) the other
            object is not a Strings object
        RuntimeError
            Raised if a server-side error is thrown while executing the
            binary operation
        """
        if op not in self.BinOps:
            raise ValueError("Strings: unsupported operator: {}".format(op))
        if isinstance(other, Strings):
            if self.size != other.size:
                raise ValueError("Strings: size mismatch {} {}".\
                                 format(self.size, other.size))
            cmd = "segmentedBinopvv"
            args = "{} {} {} {} {} {} {}".format(
                op, self.objtype, self.offsets.name, self.bytes.name,
                other.objtype, other.offsets.name, other.bytes.name)
        elif resolve_scalar_dtype(other) == 'str':
            cmd = "segmentedBinopvs"
            args = "{} {} {} {} {} {}".format(op, self.objtype,
                                              self.offsets.name,
                                              self.bytes.name, self.objtype,
                                              json.dumps([other]))
        else:
            raise ValueError("Strings: {} not supported between Strings and {}"\
                             .format(op, other.__class__.__name__))
        return create_pdarray(generic_msg(cmd=cmd, args=args))
Esempio n. 3
0
 def __getitem__(self, key):
     if np.isscalar(key) and resolve_scalar_dtype(key) == 'int64':
         orig_key = key
         if key < 0:
             # Interpret negative key as offset from end of array
             key += self.size
         if (key >= 0 and key < self.size):
             cmd = "segmentedIndex"
             args = " {} {} {} {} {}".format('intIndex', self.objtype,
                                             self.offsets.name,
                                             self.bytes.name, key)
             repMsg = generic_msg(cmd=cmd, args=args)
             _, value = repMsg.split(maxsplit=1)
             return parse_single_value(value)
         else:
             raise IndexError("[int] {} is out of bounds with size {}".\
                              format(orig_key,self.size))
     elif isinstance(key, slice):
         (start, stop, stride) = key.indices(self.size)
         self.logger.debug('start: {}; stop: {}; stride: {}'.format(
             start, stop, stride))
         cmd = "segmentedIndex"
         args = " {} {} {} {} {} {} {}".format('sliceIndex', self.objtype,
                                               self.offsets.name,
                                               self.bytes.name, start, stop,
                                               stride)
         repMsg = generic_msg(cmd=cmd, args=args)
         offsets, values = repMsg.split('+')
         return Strings(offsets, values)
     elif isinstance(key, pdarray):
         kind, _ = translate_np_dtype(key.dtype)
         if kind not in ("bool", "int"):
             raise TypeError("unsupported pdarray index type {}".format(
                 key.dtype))
         if kind == "bool" and self.size != key.size:
             raise ValueError("size mismatch {} {}".format(
                 self.size, key.size))
         cmd = "segmentedIndex"
         args = "{} {} {} {} {}".format('pdarrayIndex', self.objtype,
                                        self.offsets.name, self.bytes.name,
                                        key.name)
         repMsg = generic_msg(cmd, args)
         offsets, values = repMsg.split('+')
         return Strings(offsets, values)
     else:
         raise TypeError("unsupported pdarray index type {}".format(
             key.__class__.__name__))
Esempio n. 4
0
    def _binop(self, other: Categorical, op: str) -> pdarray:
        """
        Executes the requested binop on this Categorical instance and returns 
        the results within a pdarray object.

        Parameters
        ----------
        other : Categorical
            the other object is a Categorical object
        op : str
            name of the binary operation to be performed 
      
        Returns
        -------
        pdarray
            encapsulating the results of the requested binop      

        Raises
    -   -----
        ValueError
            Raised if (1) the op is not in the self.BinOps set, or (2) if the
            sizes of this and the other instance don't match
        RuntimeError
            Raised if a server-side error is thrown while executing the
            binary operation
        """
        if op not in self.BinOps:
            raise NotImplementedError("Categorical: unsupported operator: {}".\
                                      format(op))
        if np.isscalar(other) and resolve_scalar_dtype(other) == "str":
            idxresult = self.categories._binop(other, op)
            return idxresult[self.codes]
        if self.size != other.size:
            raise ValueError("Categorical {}: size mismatch {} {}".\
                             format(op, self.size, other.size))
        if isinstance(other, Categorical):
            if self.categories.name == other.categories.name:
                return self.codes.binop(other.codes, op)
            else:
                raise NotImplementedError(
                    ("Operations between Categoricals " +
                     "with different indices not yet implemented"))
        else:
            raise NotImplementedError(
                ("Operations between Categorical and " +
                 "non-Categorical not yet implemented. " +
                 "Consider converting operands to Categorical."))
Esempio n. 5
0
 def test_resolve_scalar_dtype(self):
     '''
     Tests dtypes.resolve_scalar_dtype method
     
     :return: None
     :raise: AssertionError if 1.. test cases fail
     '''
     self.assertEqual('bool', dtypes.resolve_scalar_dtype(True))
     self.assertEqual('int64', dtypes.resolve_scalar_dtype(1))
     self.assertEqual('float64', dtypes.resolve_scalar_dtype(float(0.0)))
     self.assertEqual('str', dtypes.resolve_scalar_dtype('test'))
     self.assertEqual('int64', dtypes.resolve_scalar_dtype(np.int64(1)))
     self.assertEqual("<class 'list'>", dtypes.resolve_scalar_dtype([1]))
Esempio n. 6
0
 def __getitem__(self, key):
     if np.isscalar(key) and resolve_scalar_dtype(key) == 'int64':
         return self.categories[self.codes[key]]
     else:
         return Categorical.from_codes(self.codes[key], self.categories)
Esempio n. 7
0
def join_on_eq_with_dt(a1: pdarray,
                       a2: pdarray,
                       t1: pdarray,
                       t2: pdarray,
                       dt: int,
                       pred: str,
                       result_limit: int = 1000) -> Tuple[pdarray, pdarray]:
    """
    Performs an inner-join on equality between two integer arrays where 
    the time-window predicate is also true

    Parameters
    ----------
    a1 : pdarray, int64
        pdarray to be joined
    a2 : pdarray, int64
        pdarray to be joined
    t1 : pdarray
        timestamps in millis corresponding to the a1 pdarray
    t2 : pdarray, 
        timestamps in millis corresponding to the a2 pdarray
    dt : int
        time delta
    pred : str
        time window predicate
    result_limit : int
        size limit for returned result    

    Returns
    -------
    result_array_one : pdarray, int64
        a1 indices where a1 == a2
    result_array_one : pdarray, int64
        a2 indices where a2 == a1
        
    Raises
    ------
    TypeError
        Raised if a1, a2, t1, or t2 is not a pdarray, or if dt or 
        result_limit is not an int
    ValueError
        if a1, a2, t1, or t2 dtype is not int64, pred is not 
        'true_dt', 'abs_dt', or 'pos_dt', or result_limit is < 0    
    """
    if not (a1.dtype == akint64):
        raise ValueError("a1 must be int64 dtype")

    if not (a2.dtype == akint64):
        raise ValueError("a2 must be int64 dtype")

    if not (t1.dtype == akint64):
        raise ValueError("t1 must be int64 dtype")

    if not (t2.dtype == akint64):
        raise ValueError("t2 must be int64 dtype")

    if not (pred in predicates.keys()):
        raise ValueError("pred must be one of ", predicates.keys())

    if result_limit < 0:
        raise ValueError('the result_limit must 0 or greater')

    # format numbers for request message
    dttype = resolve_scalar_dtype(dt)
    dtstr = NUMBER_FORMAT_STRINGS[dttype].format(dt)
    predtype = resolve_scalar_dtype(predicates[pred])
    predstr = NUMBER_FORMAT_STRINGS[predtype].format(predicates[pred])
    result_limittype = resolve_scalar_dtype(result_limit)
    result_limitstr = NUMBER_FORMAT_STRINGS[result_limittype].\
                                 format(result_limit)
    # groupby on a2
    g2 = GroupBy(a2)
    # pass result into server joinEqWithDT operation
    repMsg = generic_msg("joinEqWithDT {} {} {} {} {} {} {} {} {}".\
                         format(a1.name,
                                cast(pdarray, g2.segments).name,  # type: ignore
                                cast(pdarray, g2.unique_keys).name,  # type: ignore
                                g2.permutation.name,
                                t1.name,
                                t2.name,
                                dtstr, predstr, result_limitstr))
    # create pdarrays for results
    resIAttr, resJAttr = cast(str, repMsg).split("+")
    resI = create_pdarray(resIAttr)
    resJ = create_pdarray(resJAttr)
    return (resI, resJ)
Esempio n. 8
0
def where(condition: pdarray, A: Union[numeric_scalars, pdarray],
          B: Union[numeric_scalars, pdarray]) -> pdarray:
    """
    Returns an array with elements chosen from A and B based upon a 
    conditioning array. As is the case with numpy.where, the return array
    consists of values from the first array (A) where the conditioning array 
    elements are True and from the second array (B) where the conditioning
    array elements are False.
    
    Parameters
    ----------
    condition : pdarray
        Used to choose values from A or B
    A : Union[numeric_scalars, pdarray]
        Value(s) used when condition is True
    B : Union[numeric_scalars, pdarray]
        Value(s) used when condition is False

    Returns
    -------
    pdarray
        Values chosen from A where the condition is True and B where
        the condition is False
        
    Raises 
    ------
    TypeError
        Raised if the condition object is not a pdarray, if A or B is not
        an int, np.int64, float, np.float64, or pdarray, if pdarray dtypes 
        are not supported or do not match, or multiple condition clauses (see 
        Notes section) are applied
    ValueError
        Raised if the shapes of the condition, A, and B pdarrays are unequal
        
    Examples
    --------
    >>> a1 = ak.arange(1,10)
    >>> a2 = ak.ones(9, dtype=np.int64)
    >>> cond = a1 < 5
    >>> ak.where(cond,a1,a2)
    array([1, 2, 3, 4, 1, 1, 1, 1, 1])
    
    >>> a1 = ak.arange(1,10)
    >>> a2 = ak.ones(9, dtype=np.int64)
    >>> cond = a1 == 5
    >>> ak.where(cond,a1,a2)
    array([1, 1, 1, 1, 5, 1, 1, 1, 1])

    >>> a1 = ak.arange(1,10)
    >>> a2 = 10
    >>> cond = a1 < 5
    >>> ak.where(cond,a1,a2)
    array([1, 2, 3, 4, 10, 10, 10, 10, 10])

    Notes
    -----
    A and B must have the same dtype and only one conditional clause 
    is supported e.g., n < 5, n > 1, which is supported in numpy
    is not currently supported in Arkouda
    """
    if (not isSupportedNumber(A) and not isinstance(A,pdarray)) or \
                                      (not isSupportedNumber(B) and not isinstance(B,pdarray)):
        raise TypeError(
            'both A and B must be an int, np.int64, float, np.float64, or pdarray'
        )
    if isinstance(A, pdarray) and isinstance(B, pdarray):
        repMsg = generic_msg(cmd="efunc3vv", args="{} {} {} {}".\
                             format("where",
                                    condition.name,
                                    A.name,
                                    B.name))
    # For scalars, try to convert it to the array's dtype
    elif isinstance(A, pdarray) and np.isscalar(B):
        repMsg = generic_msg(cmd="efunc3vs", args="{} {} {} {} {}".\
                             format("where",
                                    condition.name,
                                    A.name,
                                    A.dtype.name,
                                    A.format_other(B)))
    elif isinstance(B, pdarray) and np.isscalar(A):
        repMsg = generic_msg(cmd="efunc3sv", args="{} {} {} {} {}".\
                             format("where",
                                    condition.name,
                                    B.dtype.name,
                                    B.format_other(A),
                                    B.name))
    elif np.isscalar(A) and np.isscalar(B):
        # Scalars must share a common dtype (or be cast)
        dtA = resolve_scalar_dtype(A)
        dtB = resolve_scalar_dtype(B)
        # Make sure at least one of the dtypes is supported
        if not (dtA in DTypes or dtB in DTypes):
            raise TypeError(
                ("Not implemented for scalar types {} " + "and {}").format(
                    dtA, dtB))
        # If the dtypes are the same, do not cast
        if dtA == dtB:  # type: ignore
            dt = dtA
        # If the dtypes are different, try casting one direction then the other
        elif dtB in DTypes and np.can_cast(A, dtB):
            A = np.dtype(dtB).type(A)
            dt = dtB
        elif dtA in DTypes and np.can_cast(B, dtA):
            B = np.dtype(dtA).type(B)
            dt = dtA
        # Cannot safely cast
        else:
            raise TypeError(("Cannot cast between scalars {} and {} to " +
                             "supported dtype").format(A, B))
        repMsg = generic_msg(cmd="efunc3ss", args="{} {} {} {} {} {}".\
                             format("where",
                                    condition.name,
                                    dt,
                                    A,
                                    dt,
                                    B))
    return create_pdarray(type_cast(str, repMsg))
Esempio n. 9
0
    def _binop(self, other: Union[Categorical, str_scalars],
               op: str_scalars) -> pdarray:
        """
        Executes the requested binop on this Categorical instance and returns 
        the results within a pdarray object.

        Parameters
        ----------
        other : Union[Categorical,str_scalars]
            the other object is a Categorical object or string scalar
        op : str_scalars
            name of the binary operation to be performed 
      
        Returns
        -------
        pdarray
            encapsulating the results of the requested binop      

        Raises
    -   -----
        ValueError
            Raised if (1) the op is not in the self.BinOps set, or (2) if the
            sizes of this and the other instance don't match
        RuntimeError
            Raised if a server-side error is thrown while executing the
            binary operation
        """
        if op not in self.BinOps:
            raise NotImplementedError("Categorical: unsupported operator: {}".\
                                      format(op))
        if np.isscalar(other) and resolve_scalar_dtype(other) == "str":
            idxresult = self.categories._binop(other, op)
            return idxresult[self.codes]
        if self.size != cast(Categorical, other).size:
            raise ValueError("Categorical {}: size mismatch {} {}".\
                             format(op, self.size, cast(Categorical,other).size))
        if isinstance(other, Categorical):
            if (self.categories.size
                    == other.categories.size) and (self.categories
                                                   == other.categories).all():
                # Because categories are identical, codes can be compared directly
                return self.codes._binop(other.codes, op)
            else:
                # Remap both codes to the union of categories
                union = unique(
                    concatenate((self.categories, other.categories),
                                ordered=False))
                newinds = arange(union.size)
                # Inds of self.categories in unioned categories
                selfnewinds = newinds[in1d(union, self.categories)]
                # Need a permutation and segments to broadcast new codes
                if self.permutation is None or self.segments is None:
                    g = GroupBy(self.codes)
                    self.permutation = g.permutation
                    self.segments = g.segments
                # Form new codes by broadcasting new indices for unioned categories
                selfnewcodes = broadcast(self.segments, selfnewinds, self.size,
                                         self.permutation)
                # Repeat for other
                othernewinds = newinds[in1d(union, other.categories)]
                if other.permutation is None or other.segments is None:
                    g = GroupBy(other.codes)
                    other.permutation = g.permutation
                    other.segments = g.segments
                othernewcodes = broadcast(other.segments, othernewinds,
                                          other.size, other.permutation)
                # selfnewcodes and othernewcodes now refer to same unioned categories
                # and can be compared directly
                return selfnewcodes._binop(othernewcodes, op)
        else:
            raise NotImplementedError(
                ("Operations between Categorical and " +
                 "non-Categorical not yet implemented. " +
                 "Consider converting operands to Categorical."))