コード例 #1
0
def first(*, array: pa.Array, group_splits: np.array, **kwargs) -> pa.Array:
    nonnull_values = array.filter(array.is_valid())
    nonnull_splits = nonnull_group_splits(array, group_splits)
    starts = np.insert(nonnull_splits, 0, 0)
    ends = np.append(nonnull_splits, len(nonnull_values))
    nulls = starts == ends
    indices = pa.array(starts, pa.int64(), mask=nulls)
    return nonnull_values.take(indices)  # taking index NULL gives NULL
コード例 #2
0
def nunique(*, array: pa.Array, group_splits: np.array, **kwargs) -> pa.Array:
    nonnull_splits = nonnull_group_splits(array, group_splits)
    nonnull_values = array.filter(
        array.is_valid()).to_numpy(zero_copy_only=False)
    counts = np.fromiter(
        (np.unique(subarr).size
         for subarr in np.split(nonnull_values, nonnull_splits)),
        dtype=np.int64,
        count=len(nonnull_splits) + 1,
    )
    return pa.array(counts)
コード例 #3
0
 def ufunc_caller(*, array: pa.Array, group_splits: np.array,
                  **kwargs) -> pa.Array:
     nonnull_splits = nonnull_group_splits(array, group_splits)
     nonnull_values = array.filter(
         array.is_valid()).to_numpy(zero_copy_only=False)
     if force_otype:
         otype = force_otype
     else:
         otype = nonnull_values.dtype
     if pa.types.is_unicode(array.type):
         zero = ""
     else:
         zero = otype.type()
     np_result, np_empty_indices = call_ufunc(nonnull_values,
                                              nonnull_splits, otype, zero)
     return pa.array(np_result, mask=np_empty_indices)