Ejemplo n.º 1
0
def _text_cat(a: pa.Array, b: pa.Array) -> pa.Array:
    if len(a) != len(b):
        raise ValueError("Lengths of arrays don't match")

    offsets_a, data_a = _extract_string_buffers(a)
    offsets_b, data_b = _extract_string_buffers(b)
    if len(a) > 0:
        valid = _merge_valid_bitmaps(a, b)
        result_offsets = np.empty(len(a) + 1, dtype=np.int32)
        result_offsets[0] = 0
        total_size = (offsets_a[-1] - offsets_a[0]) + (offsets_b[-1] -
                                                       offsets_b[0])
        result_data = np.empty(total_size, dtype=np.uint8)
        _merge_string_data(
            len(a),
            valid,
            offsets_a,
            data_a,
            offsets_b,
            data_b,
            result_offsets,
            result_data,
        )
        buffers = [
            pa.py_buffer(x) for x in [valid, result_offsets, result_data]
        ]
        return pa.Array.from_buffers(pa.string(), len(a), buffers)
    return a
Ejemplo n.º 2
0
def _apply_binary_str_array(a: pa.Array,
                            b: pa.Array,
                            *,
                            func: Callable,
                            output_dtype,
                            parallel: bool = False):
    out = np.empty(len(a), dtype=output_dtype)

    offsets_buffer_a, data_buffer_a = _extract_string_buffers(a)
    offsets_buffer_b, data_buffer_b = _extract_string_buffers(b)

    if a.null_count == 0 and b.null_count == 0:
        if parallel:
            call = _apply_no_nulls_parallel
        else:
            call = _apply_no_nulls
        call(
            func,
            len(a),
            offsets_buffer_a,
            data_buffer_a,
            offsets_buffer_b,
            data_buffer_b,
            out,
        )
        return pa.array(out)
    else:
        valid = _merge_valid_bitmaps(a, b)
        if parallel:
            call = _apply_with_nulls_parallel
        else:
            call = _apply_with_nulls
        call(
            func,
            len(a),
            valid,
            offsets_buffer_a,
            data_buffer_a,
            offsets_buffer_b,
            data_buffer_b,
            out,
        )
        buffers = [pa.py_buffer(x) for x in [valid, out]]
        return pa.Array.from_buffers(pa.int64(), len(out), buffers)
Ejemplo n.º 3
0
def test_merge_valid_bitmaps():
    a = pa.array([1, 1, 1, 1, 1, 1, 1, 1, 1])
    b = pa.array([1, 1, 1, None, None, None, 1, 1, 1])

    expected = np.array([0xFF, 0x1], dtype=np.uint8)
    result = _merge_valid_bitmaps(a, a)
    npt.assert_array_equal(result, expected)
    expected = np.array([0xC7, 0x1], dtype=np.uint8)
    result = _merge_valid_bitmaps(a, b)
    npt.assert_array_equal(result, expected)

    expected = np.array([0x1], dtype=np.uint8)
    result = _merge_valid_bitmaps(a.slice(8, 1), a.slice(8, 1))
    npt.assert_array_equal(result, expected)

    expected = np.array([0xF], dtype=np.uint8)
    result = _merge_valid_bitmaps(a.slice(0, 4), a.slice(0, 4))
    npt.assert_array_equal(result, expected)
    expected = np.array([0x7], dtype=np.uint8)
    result = _merge_valid_bitmaps(a.slice(0, 4), b.slice(0, 4))
    npt.assert_array_equal(result, expected)

    expected = np.array([0xF], dtype=np.uint8)
    result = _merge_valid_bitmaps(a.slice(5, 4), a.slice(5, 4))
    npt.assert_array_equal(result, expected)
    expected = np.array([0xE], dtype=np.uint8)
    result = _merge_valid_bitmaps(a.slice(5, 4), b.slice(5, 4))
    npt.assert_array_equal(result, expected)

    expected = np.array([0x3], dtype=np.uint8)
    result = _merge_valid_bitmaps(a.slice(5, 2), a.slice(5, 2))
    npt.assert_array_equal(result, expected)
    expected = np.array([0x2], dtype=np.uint8)
    result = _merge_valid_bitmaps(a.slice(5, 2), b.slice(5, 2))
    npt.assert_array_equal(result, expected)

    expected = np.array([0x3], dtype=np.uint8)
    result = _merge_valid_bitmaps(a.slice(5, 2), a.slice(3, 2))
    npt.assert_array_equal(result, expected)
    expected = np.array([0x0], dtype=np.uint8)
    result = _merge_valid_bitmaps(a.slice(5, 2), b.slice(3, 2))
    npt.assert_array_equal(result, expected)