コード例 #1
0
ファイル: string.py プロジェクト: simonjayhawkins/fletcher
def _slice_generic(offsets, data, valid_bits, valid_offset, start: int,
                   end: int, step: int) -> StringArrayBuilder:
    builder = StringArrayBuilder(len(offsets) - 1)

    for i in prange(len(offsets) - 1):
        if len(valid_bits) > 0:
            byte_offset = (i + valid_offset) // 8
            bit_offset = (i + valid_offset) % 8
            mask = np.uint8(1 << bit_offset)
            valid = valid_bits[byte_offset] & mask
            if not valid:
                builder.append_null()
                continue

        str_len_bytes = offsets[i + 1] - offsets[i]
        char_bytes: List[bytes] = []
        byte_idx = 0

        while byte_idx < str_len_bytes:
            char_size = get_utf8_size(data[offsets[i] + byte_idx])
            char_bytes.append(data[offsets[i] + byte_idx:offsets[i] +
                                   byte_idx + char_size])
            byte_idx += char_size

        include_bytes: List[bytes] = []  # type: ignore

        char_idx = start
        if start >= -len(char_bytes) and start < 0:
            char_idx += len(char_bytes)

        true_end = end
        if end >= -len(char_bytes) and end < 0:
            true_end += len(char_bytes)

        # Positive step
        if step > 0:
            if char_idx < 0:
                char_idx = 0
            while (end is None
                   or char_idx < true_end) and char_idx < len(char_bytes):
                include_bytes.extend(char_bytes[char_idx])  # type: ignore
                char_idx += step

        # Negative step
        else:
            if char_idx >= len(char_bytes):
                char_idx = len(char_bytes) - 1
            while (end is None or char_idx > true_end) and char_idx >= 0:
                if char_idx < len(char_bytes):
                    include_bytes.extend(char_bytes[char_idx])  # type: ignore
                char_idx += step

        builder.append_value(include_bytes, len(include_bytes))

    return builder
コード例 #2
0
ファイル: string.py プロジェクト: simonjayhawkins/fletcher
def _slice_pos_inputs_step(offsets, data, valid_bits, valid_offset, start: int,
                           end: int, step: int) -> StringArrayBuilder:
    """
    start, end >= 0
    step > 1
    """
    builder = StringArrayBuilder(len(offsets) - 1)

    for i in prange(len(offsets) - 1):
        if len(valid_bits) > 0:
            byte_offset = (i + valid_offset) // 8
            bit_offset = (i + valid_offset) % 8
            mask = np.uint8(1 << bit_offset)
            valid = valid_bits[byte_offset] & mask
            if not valid:
                builder.append_null()
                continue

        str_len_bytes = offsets[i + 1] - offsets[i]

        char_idx = 0
        byte_idx = 0

        while char_idx < start and byte_idx < str_len_bytes:
            char_idx += 1
            byte_idx += get_utf8_size(data[offsets[i] + byte_idx])

        to_skip = 0
        include_bytes: List[bytes] = []

        while (end is None or char_idx < end) and byte_idx < str_len_bytes:
            char_size = get_utf8_size(data[offsets[i] + byte_idx])

            if not to_skip:
                include_bytes.extend(data[offsets[i] + byte_idx:offsets[i] +
                                          byte_idx + char_size])
                to_skip = step

            char_idx += 1
            byte_idx += char_size
            to_skip -= 1

        builder.append_value(include_bytes, len(include_bytes))
    return builder
コード例 #3
0
ファイル: string.py プロジェクト: simonjayhawkins/fletcher
def _slice_pos_inputs_nostep(offsets, data, valid_bits, valid_offset,
                             start: int, end: int) -> StringArrayBuilder:
    """
    start, end >= 0
    step == 1
    """
    builder = StringArrayBuilder(len(offsets) - 1)

    for i in prange(len(offsets) - 1):

        if len(valid_bits) > 0:
            byte_offset = (i + valid_offset) // 8
            bit_offset = (i + valid_offset) % 8
            mask = np.uint8(1 << bit_offset)
            valid = valid_bits[byte_offset] & mask
            if not valid:
                builder.append_null()
                continue

        str_len_bytes = offsets[i + 1] - offsets[i]

        char_idx = 0
        byte_idx = 0

        while char_idx < start and byte_idx < str_len_bytes:
            char_idx += 1
            byte_idx += get_utf8_size(data[offsets[i] + byte_idx])

        start_byte = offsets[i] + byte_idx

        while (end is None or char_idx < end) and byte_idx < str_len_bytes:
            char_idx += 1
            byte_idx += get_utf8_size(data[offsets[i] + byte_idx])

        end_byte = offsets[i] + byte_idx
        builder.append_value(data[start_byte:end_byte], end_byte - start_byte)
    return builder