예제 #1
0
def setxor1d(pda1: pdarray,
             pda2: pdarray,
             assume_unique: bool = False) -> pdarray:
    """
    Find the set exclusive-or (symmetric difference) of two arrays.

    Return the sorted, unique values that are in only one (not both) of the
    input arrays.

    Parameters
    ----------
    pda1 : pdarray
        Input array.
    pda2 : pdarray
        Input array.
    assume_unique : bool
        If True, the input arrays are both assumed to be unique, which
        can speed up the calculation.  Default is False.

    Returns
    -------
    pdarray
        Sorted 1D array of unique values that are in only one of the input
        arrays.

    Raises
    ------
    TypeError
        Raised if either pda1 or pda2 is not a pdarray
    RuntimeError
        Raised if the dtype of either pdarray is not supported

    Notes
    -----
    ak.setxor1d is not supported for bool or float64 pdarrays

    Examples
    --------
    >>> a = ak.array([1, 2, 3, 2, 4])
    >>> b = ak.array([2, 3, 5, 7, 5])
    >>> ak.setxor1d(a,b)
    array([1, 4, 5, 7])
    """
    if pda1.size == 0:
        return pda2  # return other pdarray if pda1 is empty
    if pda2.size == 0:
        return pda1  # return other pdarray if pda2 is empty
    if pda1.dtype == int and pda2.dtype == int:
        repMsg = generic_msg(cmd="setxor1d", args="{} {} {}".\
                             format(pda1.name, pda2.name, assume_unique))
        return create_pdarray(cast(str, repMsg))
    if not assume_unique:
        pda1 = cast(pdarray, unique(pda1))
        pda2 = cast(pdarray, unique(pda2))
    aux = concatenate((pda1, pda2), ordered=False)
    aux_sort_indices = argsort(aux)
    aux = aux[aux_sort_indices]
    flag = concatenate((array([True]), aux[1:] != aux[:-1], array([True])))
    return aux[flag[1:] & flag[:-1]]
예제 #2
0
def setxor1d(pda1, pda2, assume_unique=False):
    """
    Find the set exclusive-or (symmetric difference) of two arrays.

    Return the sorted, unique values that are in only one (not both) of the
    input arrays.

    Parameters
    ----------
    pda1 : pdarray
        Input array.
    pda2 : pdarray
        Input array.
    assume_unique : bool
        If True, the input arrays are both assumed to be unique, which
        can speed up the calculation.  Default is False.

    Returns
    -------
    pdarray
        Sorted 1D array of unique values that are in only one of the input
        arrays.

    Examples
    --------
    >>> a = ak.array([1, 2, 3, 2, 4])
    >>> b = ak.array([2, 3, 5, 7, 5])
    >>> ak.setxor1d(a,b)
    array([1, 4, 5, 7])
    """
    if isinstance(pda1, pdarray) and isinstance(pda2, pdarray):
        if pda1.size == 0:
            return pda2 # return other pdarray if pda1 is empty
        if pda2.size == 0:
            return pda1 # return other pdarray if pda2 is empty
        if pda1.dtype == int and pda2.dtype == int:
            repMsg = generic_msg("setxor1d {} {} {}".format(pda1.name, pda2.name, assume_unique))
            return create_pdarray(repMsg)
        if not assume_unique:
            pda1 = unique(pda1)
            pda2 = unique(pda2)
        aux = concatenate((pda1, pda2))
        aux_sort_indices = argsort(aux)
        aux = aux[aux_sort_indices]
        flag = concatenate((array([True]), aux[1:] != aux[:-1], array([True])))
        return aux[flag[1:] & flag[:-1]]
    else:
        raise TypeError("must be pdarray {} or {}".format(pda1,pda2))
예제 #3
0
    def broadcast(self, values : pdarray) -> pdarray:
        """
        Fill each group's segment with a constant value.

        Parameters
        ----------
        values : pdarray
            The values to put in each group's segment

        Returns
        -------
        pdarray
            The broadcast values
            
        Raises
        ------
        TypeError
            Raised if value is not a pdarray object
        ValueError
            Raised if the values array does not have one 
            value per segment

        Notes
        -----
        This function is a sparse analog of ``np.broadcast``. If a
        GroupBy object represents a sparse matrix (tensor), then
        this function takes a (dense) column vector and replicates
        each value to the non-zero elements in the corresponding row.

        The returned array is in permuted (grouped) order. To get
        back to the order of the array on which GroupBy was called,
        the user must invert the permutation (see below).

        Examples
        --------
        >>> a = ak.array([0, 1, 0, 1, 0])
        >>> values = ak.array([3, 5])
        >>> g = ak.GroupBy(a)
        # Result is in grouped order
        >>> g.broadcast(values)
        array([3, 3, 3, 5, 5]

        >>> b = ak.zeros_like(a)
        # Result is in original order
        >>> b[g.permutation] = g.broadcast(values)
        >>> b
        array([3, 5, 3, 5, 3])
        """

        if not isinstance(values, pdarray):
            raise TypeError("Vals must be pdarray")
        if values.size != self.segments.size:
            raise ValueError("Must have one value per segment")
        temp = zeros(self.size, values.dtype)
        if values.size == 0:
            return temp
        diffs = concatenate((array([values[0]]), values[1:] - values[:-1]))
        temp[self.segments] = diffs
        return cumsum(temp)
예제 #4
0
    def broadcast(self, values : pdarray) -> pdarray:
        """
        Fill each group's segment with a constant value.

        Parameters
        ----------
        values : pdarray
            The values to put in each group's segment

        Returns
        -------
        pdarray
            The broadcast values
            
        Raises
        ------
        TypeError
            Raised if value is not a pdarray object
        ValueError
            Raised if the values array does not have one 
            value per segment

        Notes
        -----
        This function is a sparse analog of ``np.broadcast``. If a
        GroupBy object represents a sparse matrix (tensor), then
        this function takes a (dense) column vector and replicates
        each value to the non-zero elements in the corresponding row.

        The returned array is in permuted (grouped) order. To get
        back to the order of the array on which GroupBy was called,
        the user must invert the permutation (see below).

        Examples
        --------
        >>> a = ak.array([0, 1, 0, 1, 0])
        >>> values = ak.array([3, 5])
        >>> g = ak.GroupBy(a)
        # Result is in grouped order
        >>> g.broadcast(values)
        array([3, 3, 3, 5, 5]

        >>> b = ak.zeros_like(a)
        # Result is in original order
        >>> b[g.permutation] = g.broadcast(values)
        >>> b
        array([3, 5, 3, 5, 3])
        
        >>> a = ak.randint(1,5,10)
        >>> a
        array([3, 1, 4, 4, 4, 1, 3, 3, 2, 2])
        >>> g = ak.GroupBy(a)
        >>> keys,counts = g.count()
        >>> g.broadcast(counts > 2)
        array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])
        >>> g.broadcast(counts == 3)
        array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])
        >>> g.broadcast(counts < 4)
        array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
        """
        '''if values a boolean array, convert to an int64 array, which
           is needed for now because Arkouda does not support broadcasting
           of boolean arrays'''
        if values.dtype == np.bool:
            values = 1*values
        if values.size != self.segments.size:
            raise ValueError("Must have one value per segment")
        temp = zeros(self.size, values.dtype)
        if values.size == 0:
            return temp
        diffs = concatenate((array([values[0]]), values[1:] - values[:-1]))
        temp[self.segments] = diffs
        return cumsum(temp)