def _text_cat(a: pa.Array, b: pa.Array) -> pa.Array: if len(a) != len(b): raise ValueError("Lengths of arrays don't match") offsets_a, data_a = _extract_string_buffers(a) offsets_b, data_b = _extract_string_buffers(b) if len(a) > 0: valid = _merge_valid_bitmaps(a, b) result_offsets = np.empty(len(a) + 1, dtype=np.int32) result_offsets[0] = 0 total_size = (offsets_a[-1] - offsets_a[0]) + (offsets_b[-1] - offsets_b[0]) result_data = np.empty(total_size, dtype=np.uint8) _merge_string_data( len(a), valid, offsets_a, data_a, offsets_b, data_b, result_offsets, result_data, ) buffers = [ pa.py_buffer(x) for x in [valid, result_offsets, result_data] ] return pa.Array.from_buffers(pa.string(), len(a), buffers) return a
def _apply_binary_str_array(a: pa.Array, b: pa.Array, *, func: Callable, output_dtype, parallel: bool = False): out = np.empty(len(a), dtype=output_dtype) offsets_buffer_a, data_buffer_a = _extract_string_buffers(a) offsets_buffer_b, data_buffer_b = _extract_string_buffers(b) if a.null_count == 0 and b.null_count == 0: if parallel: call = _apply_no_nulls_parallel else: call = _apply_no_nulls call( func, len(a), offsets_buffer_a, data_buffer_a, offsets_buffer_b, data_buffer_b, out, ) return pa.array(out) else: valid = _merge_valid_bitmaps(a, b) if parallel: call = _apply_with_nulls_parallel else: call = _apply_with_nulls call( func, len(a), valid, offsets_buffer_a, data_buffer_a, offsets_buffer_b, data_buffer_b, out, ) buffers = [pa.py_buffer(x) for x in [valid, out]] return pa.Array.from_buffers(pa.int64(), len(out), buffers)
def test_merge_valid_bitmaps(): a = pa.array([1, 1, 1, 1, 1, 1, 1, 1, 1]) b = pa.array([1, 1, 1, None, None, None, 1, 1, 1]) expected = np.array([0xFF, 0x1], dtype=np.uint8) result = _merge_valid_bitmaps(a, a) npt.assert_array_equal(result, expected) expected = np.array([0xC7, 0x1], dtype=np.uint8) result = _merge_valid_bitmaps(a, b) npt.assert_array_equal(result, expected) expected = np.array([0x1], dtype=np.uint8) result = _merge_valid_bitmaps(a.slice(8, 1), a.slice(8, 1)) npt.assert_array_equal(result, expected) expected = np.array([0xF], dtype=np.uint8) result = _merge_valid_bitmaps(a.slice(0, 4), a.slice(0, 4)) npt.assert_array_equal(result, expected) expected = np.array([0x7], dtype=np.uint8) result = _merge_valid_bitmaps(a.slice(0, 4), b.slice(0, 4)) npt.assert_array_equal(result, expected) expected = np.array([0xF], dtype=np.uint8) result = _merge_valid_bitmaps(a.slice(5, 4), a.slice(5, 4)) npt.assert_array_equal(result, expected) expected = np.array([0xE], dtype=np.uint8) result = _merge_valid_bitmaps(a.slice(5, 4), b.slice(5, 4)) npt.assert_array_equal(result, expected) expected = np.array([0x3], dtype=np.uint8) result = _merge_valid_bitmaps(a.slice(5, 2), a.slice(5, 2)) npt.assert_array_equal(result, expected) expected = np.array([0x2], dtype=np.uint8) result = _merge_valid_bitmaps(a.slice(5, 2), b.slice(5, 2)) npt.assert_array_equal(result, expected) expected = np.array([0x3], dtype=np.uint8) result = _merge_valid_bitmaps(a.slice(5, 2), a.slice(3, 2)) npt.assert_array_equal(result, expected) expected = np.array([0x0], dtype=np.uint8) result = _merge_valid_bitmaps(a.slice(5, 2), b.slice(3, 2)) npt.assert_array_equal(result, expected)