def setxor1d(pda1: pdarray, pda2: pdarray, assume_unique: bool = False) -> pdarray: """ Find the set exclusive-or (symmetric difference) of two arrays. Return the sorted, unique values that are in only one (not both) of the input arrays. Parameters ---------- pda1 : pdarray Input array. pda2 : pdarray Input array. assume_unique : bool If True, the input arrays are both assumed to be unique, which can speed up the calculation. Default is False. Returns ------- pdarray Sorted 1D array of unique values that are in only one of the input arrays. Raises ------ TypeError Raised if either pda1 or pda2 is not a pdarray RuntimeError Raised if the dtype of either pdarray is not supported Notes ----- ak.setxor1d is not supported for bool or float64 pdarrays Examples -------- >>> a = ak.array([1, 2, 3, 2, 4]) >>> b = ak.array([2, 3, 5, 7, 5]) >>> ak.setxor1d(a,b) array([1, 4, 5, 7]) """ if pda1.size == 0: return pda2 # return other pdarray if pda1 is empty if pda2.size == 0: return pda1 # return other pdarray if pda2 is empty if pda1.dtype == int and pda2.dtype == int: repMsg = generic_msg(cmd="setxor1d", args="{} {} {}".\ format(pda1.name, pda2.name, assume_unique)) return create_pdarray(cast(str, repMsg)) if not assume_unique: pda1 = cast(pdarray, unique(pda1)) pda2 = cast(pdarray, unique(pda2)) aux = concatenate((pda1, pda2), ordered=False) aux_sort_indices = argsort(aux) aux = aux[aux_sort_indices] flag = concatenate((array([True]), aux[1:] != aux[:-1], array([True]))) return aux[flag[1:] & flag[:-1]]
def setxor1d(pda1, pda2, assume_unique=False): """ Find the set exclusive-or (symmetric difference) of two arrays. Return the sorted, unique values that are in only one (not both) of the input arrays. Parameters ---------- pda1 : pdarray Input array. pda2 : pdarray Input array. assume_unique : bool If True, the input arrays are both assumed to be unique, which can speed up the calculation. Default is False. Returns ------- pdarray Sorted 1D array of unique values that are in only one of the input arrays. Examples -------- >>> a = ak.array([1, 2, 3, 2, 4]) >>> b = ak.array([2, 3, 5, 7, 5]) >>> ak.setxor1d(a,b) array([1, 4, 5, 7]) """ if isinstance(pda1, pdarray) and isinstance(pda2, pdarray): if pda1.size == 0: return pda2 # return other pdarray if pda1 is empty if pda2.size == 0: return pda1 # return other pdarray if pda2 is empty if pda1.dtype == int and pda2.dtype == int: repMsg = generic_msg("setxor1d {} {} {}".format(pda1.name, pda2.name, assume_unique)) return create_pdarray(repMsg) if not assume_unique: pda1 = unique(pda1) pda2 = unique(pda2) aux = concatenate((pda1, pda2)) aux_sort_indices = argsort(aux) aux = aux[aux_sort_indices] flag = concatenate((array([True]), aux[1:] != aux[:-1], array([True]))) return aux[flag[1:] & flag[:-1]] else: raise TypeError("must be pdarray {} or {}".format(pda1,pda2))
def broadcast(self, values : pdarray) -> pdarray: """ Fill each group's segment with a constant value. Parameters ---------- values : pdarray The values to put in each group's segment Returns ------- pdarray The broadcast values Raises ------ TypeError Raised if value is not a pdarray object ValueError Raised if the values array does not have one value per segment Notes ----- This function is a sparse analog of ``np.broadcast``. If a GroupBy object represents a sparse matrix (tensor), then this function takes a (dense) column vector and replicates each value to the non-zero elements in the corresponding row. The returned array is in permuted (grouped) order. To get back to the order of the array on which GroupBy was called, the user must invert the permutation (see below). Examples -------- >>> a = ak.array([0, 1, 0, 1, 0]) >>> values = ak.array([3, 5]) >>> g = ak.GroupBy(a) # Result is in grouped order >>> g.broadcast(values) array([3, 3, 3, 5, 5] >>> b = ak.zeros_like(a) # Result is in original order >>> b[g.permutation] = g.broadcast(values) >>> b array([3, 5, 3, 5, 3]) """ if not isinstance(values, pdarray): raise TypeError("Vals must be pdarray") if values.size != self.segments.size: raise ValueError("Must have one value per segment") temp = zeros(self.size, values.dtype) if values.size == 0: return temp diffs = concatenate((array([values[0]]), values[1:] - values[:-1])) temp[self.segments] = diffs return cumsum(temp)
def broadcast(self, values : pdarray) -> pdarray: """ Fill each group's segment with a constant value. Parameters ---------- values : pdarray The values to put in each group's segment Returns ------- pdarray The broadcast values Raises ------ TypeError Raised if value is not a pdarray object ValueError Raised if the values array does not have one value per segment Notes ----- This function is a sparse analog of ``np.broadcast``. If a GroupBy object represents a sparse matrix (tensor), then this function takes a (dense) column vector and replicates each value to the non-zero elements in the corresponding row. The returned array is in permuted (grouped) order. To get back to the order of the array on which GroupBy was called, the user must invert the permutation (see below). Examples -------- >>> a = ak.array([0, 1, 0, 1, 0]) >>> values = ak.array([3, 5]) >>> g = ak.GroupBy(a) # Result is in grouped order >>> g.broadcast(values) array([3, 3, 3, 5, 5] >>> b = ak.zeros_like(a) # Result is in original order >>> b[g.permutation] = g.broadcast(values) >>> b array([3, 5, 3, 5, 3]) >>> a = ak.randint(1,5,10) >>> a array([3, 1, 4, 4, 4, 1, 3, 3, 2, 2]) >>> g = ak.GroupBy(a) >>> keys,counts = g.count() >>> g.broadcast(counts > 2) array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> g.broadcast(counts == 3) array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) >>> g.broadcast(counts < 4) array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) """ '''if values a boolean array, convert to an int64 array, which is needed for now because Arkouda does not support broadcasting of boolean arrays''' if values.dtype == np.bool: values = 1*values if values.size != self.segments.size: raise ValueError("Must have one value per segment") temp = zeros(self.size, values.dtype) if values.size == 0: return temp diffs = concatenate((array([values[0]]), values[1:] - values[:-1])) temp[self.segments] = diffs return cumsum(temp)