def _replace_with_indices( cls, chunk: pa.Array, indices: npt.NDArray[np.intp], value: npt.NDArray[Any], ) -> pa.Array: """ Replace items selected with a set of positional indices. Analogous to pyarrow.compute.replace_with_mask, except that replacement positions are identified via indices rather than a mask. Parameters ---------- chunk : pa.Array indices : npt.NDArray[np.intp] value : npt.NDArray[Any] Replacement value(s). Returns ------- pa.Array """ n = len(indices) if n == 0: return chunk start, stop = indices[[0, -1]] if (stop - start) == (n - 1): # fast path for a contiguous set of indices arrays = [ chunk[:start], pa.array(value, type=chunk.type, from_pandas=True), chunk[stop + 1:], ] arrays = [arr for arr in arrays if len(arr)] if len(arrays) == 1: return arrays[0] return pa.concat_arrays(arrays) mask = np.zeros(len(chunk), dtype=np.bool_) mask[indices] = True if pa_version_under5p0: arr = chunk.to_numpy(zero_copy_only=False) arr[mask] = value return pa.array(arr, type=chunk.type) if isna(value).all(): return pc.if_else(mask, None, chunk) return pc.replace_with_mask(chunk, mask, value)
def _set_via_chunk_iteration(self, indices: npt.NDArray[np.intp], value: npt.NDArray[Any]) -> pa.ChunkedArray: """ Loop through the array chunks and set the new values while leaving the chunking layout unchanged. """ chunk_indices = self._within_chunk_indices(indices) new_data = [] for i, chunk in enumerate(self._data.iterchunks()): c_ind = chunk_indices[i] n = len(c_ind) c_value, value = value[:n], value[n:] if n == 1: # fast path chunk = self._set_single_index_in_chunk( chunk, c_ind[0], c_value[0]) elif n > 0: mask = np.zeros(len(chunk), dtype=np.bool_) mask[c_ind] = True if not pa_version_under5p0: if c_value is None or isna(np.array(c_value)).all(): chunk = pc.if_else(mask, None, chunk) else: chunk = pc.replace_with_mask(chunk, mask, c_value) else: # The pyarrow compute functions were added in # version 5.0. For prior versions we implement # our own by converting to numpy and back. chunk = chunk.to_numpy(zero_copy_only=False) chunk[mask] = c_value chunk = pa.array(chunk, type=pa.string()) new_data.append(chunk) return pa.chunked_array(new_data)