def _1(a: pa.ChunkedArray, b: Any, ops: Dict[str, Callable]): """Apply a NumPy ufunc where at least one of the arguments is an Arrow structure.""" if isinstance(b, pa.ChunkedArray): if len(a) != len(b): raise ValueError("Inputs don't have the same length.") in_a_offsets, in_b_offsets = _combined_in_chunk_offsets(a, b) new_chunks: List[pa.Array] = [] for a_offset, b_offset in zip(in_a_offsets, in_b_offsets): a_slice = a.chunk(a_offset[0])[a_offset[1]:a_offset[1] + a_offset[2]] b_slice = b.chunk(b_offset[0])[b_offset[1]:b_offset[1] + b_offset[2]] new_chunks.append( dispatch_chunked_binary_map(a_slice, b_slice, ops)) return pa.chunked_array(new_chunks) elif np.isscalar(b): new_chunks = [] for chunk in a.iterchunks(): new_chunks.append(dispatch_chunked_binary_map(chunk, b, ops)) return pa.chunked_array(new_chunks) else: if len(a) != len(b): raise ValueError("Inputs don't have the same length.") new_chunks = [] offsets = _calculate_chunk_offsets(a) for chunk, offset in zip(a.iterchunks(), offsets): new_chunks.append( dispatch_chunked_binary_map(chunk, b[offset:offset + len(chunk)], ops)) return pa.chunked_array(new_chunks)
def _1(a: pa.ChunkedArray, b: Any, op: Callable): """Apply a NumPy ufunc where at least one of the arguments is an Arrow structure.""" if isinstance(b, pa.ChunkedArray): in_a_offsets, in_b_offsets = _combined_in_chunk_offsets(a, b) new_chunks: List[pa.Array] = [] for a_offset, b_offset in zip(in_a_offsets, in_b_offsets): a_slice = a.chunk(a_offset[0])[a_offset[1]:a_offset[1] + a_offset[2]] b_slice = b.chunk(b_offset[0])[b_offset[1]:b_offset[1] + b_offset[2]] new_chunks.append(np_ufunc_op(a_slice, b_slice, op)) return pa.chunked_array(new_chunks) elif np.isscalar(b): new_chunks = [] for chunk in a.iterchunks(): new_chunks.append(np_ufunc_op(chunk, b, op)) return pa.chunked_array(new_chunks) else: new_chunks = [] offsets = _calculate_chunk_offsets(a) for chunk, offset in zip(a.iterchunks(), offsets): new_chunks.append( np_ufunc_op(chunk, b[offset:offset + len(chunk)], op)) return pa.chunked_array(new_chunks)
def _text_cat_chunked_mixed(a: pa.ChunkedArray, b: pa.Array) -> pa.ChunkedArray: new_chunks = [] offsets = _calculate_chunk_offsets(a) for chunk, offset in zip(a.iterchunks(), offsets): new_chunks.append(_text_cat(chunk, b[offset:offset + len(chunk)])) return pa.chunked_array(new_chunks)
def _calculate_chunk_offsets(chunked_array: pa.ChunkedArray) -> np.ndarray: """Return an array holding the indices pointing to the first element of each chunk.""" offset = 0 offsets = [] for chunk in chunked_array.iterchunks(): offsets.append(offset) offset += len(chunk) return np.array(offsets)
def _text_cat_chunked_2(a: pa.Array, b: pa.ChunkedArray) -> pa.ChunkedArray: new_chunks = [] offsets = _calculate_chunk_offsets(b) for chunk, offset in zip(b.iterchunks(), offsets): new_chunks.append(_text_cat(a[offset:offset + len(chunk)], chunk)) return pa.chunked_array(new_chunks)