Beispiel #1
0
def _1(a: pa.ChunkedArray, b: Any, ops: Dict[str, Callable]):
    """Apply a NumPy ufunc where at least one of the arguments is an Arrow structure."""
    if isinstance(b, pa.ChunkedArray):
        if len(a) != len(b):
            raise ValueError("Inputs don't have the same length.")
        in_a_offsets, in_b_offsets = _combined_in_chunk_offsets(a, b)

        new_chunks: List[pa.Array] = []
        for a_offset, b_offset in zip(in_a_offsets, in_b_offsets):
            a_slice = a.chunk(a_offset[0])[a_offset[1]:a_offset[1] +
                                           a_offset[2]]
            b_slice = b.chunk(b_offset[0])[b_offset[1]:b_offset[1] +
                                           b_offset[2]]
            new_chunks.append(
                dispatch_chunked_binary_map(a_slice, b_slice, ops))
        return pa.chunked_array(new_chunks)
    elif np.isscalar(b):
        new_chunks = []
        for chunk in a.iterchunks():
            new_chunks.append(dispatch_chunked_binary_map(chunk, b, ops))
        return pa.chunked_array(new_chunks)
    else:
        if len(a) != len(b):
            raise ValueError("Inputs don't have the same length.")
        new_chunks = []
        offsets = _calculate_chunk_offsets(a)
        for chunk, offset in zip(a.iterchunks(), offsets):
            new_chunks.append(
                dispatch_chunked_binary_map(chunk,
                                            b[offset:offset + len(chunk)],
                                            ops))
        return pa.chunked_array(new_chunks)
Beispiel #2
0
def _1(a: pa.ChunkedArray, b: Any, op: Callable):
    """Apply a NumPy ufunc where at least one of the arguments is an Arrow structure."""
    if isinstance(b, pa.ChunkedArray):
        in_a_offsets, in_b_offsets = _combined_in_chunk_offsets(a, b)

        new_chunks: List[pa.Array] = []
        for a_offset, b_offset in zip(in_a_offsets, in_b_offsets):
            a_slice = a.chunk(a_offset[0])[a_offset[1]:a_offset[1] +
                                           a_offset[2]]
            b_slice = b.chunk(b_offset[0])[b_offset[1]:b_offset[1] +
                                           b_offset[2]]
            new_chunks.append(np_ufunc_op(a_slice, b_slice, op))
        return pa.chunked_array(new_chunks)
    elif np.isscalar(b):
        new_chunks = []
        for chunk in a.iterchunks():
            new_chunks.append(np_ufunc_op(chunk, b, op))
        return pa.chunked_array(new_chunks)
    else:
        new_chunks = []
        offsets = _calculate_chunk_offsets(a)
        for chunk, offset in zip(a.iterchunks(), offsets):
            new_chunks.append(
                np_ufunc_op(chunk, b[offset:offset + len(chunk)], op))
        return pa.chunked_array(new_chunks)
Beispiel #3
0
def _text_cat_chunked_mixed(a: pa.ChunkedArray,
                            b: pa.Array) -> pa.ChunkedArray:
    new_chunks = []
    offsets = _calculate_chunk_offsets(a)
    for chunk, offset in zip(a.iterchunks(), offsets):
        new_chunks.append(_text_cat(chunk, b[offset:offset + len(chunk)]))
    return pa.chunked_array(new_chunks)
Beispiel #4
0
def _calculate_chunk_offsets(chunked_array: pa.ChunkedArray) -> np.ndarray:
    """Return an array holding the indices pointing to the first element of each chunk."""
    offset = 0
    offsets = []
    for chunk in chunked_array.iterchunks():
        offsets.append(offset)
        offset += len(chunk)
    return np.array(offsets)
Beispiel #5
0
def _text_cat_chunked_2(a: pa.Array, b: pa.ChunkedArray) -> pa.ChunkedArray:
    new_chunks = []
    offsets = _calculate_chunk_offsets(b)
    for chunk, offset in zip(b.iterchunks(), offsets):
        new_chunks.append(_text_cat(a[offset:offset + len(chunk)], chunk))
    return pa.chunked_array(new_chunks)