Esempio n. 1
0
        def getitem_str_impl(arr, slice_index, start, count):
            rank = hpat.distributed_api.get_rank()
            k = slice_index.stop

            # get total characters for allocation
            n_chars = np.uint64(0)
            if k > start:
                # if slice end is beyond the start of this subset we have to send our elements
                my_end = min(count, k - start)
                my_arr = arr[:my_end]
            else:
                my_arr = arr[:0]

            # get the total number of chars in our array, then gather all arrays into one
            # and compute total number of chars in all arrays
            n_chars = num_total_chars(my_arr)
            my_arr = hpat.distributed_api.gatherv(my_arr)
            n_chars = hpat.distributed_api.dist_reduce(n_chars,
                                                       np.int32(reduce_op))

            if rank != 0:
                out_arr = pre_alloc_string_array(k, n_chars)
            else:
                out_arr = my_arr

            # actual communication
            hpat.distributed_api.bcast(out_arr)
            return out_arr
Esempio n. 2
0
        def getitem_str_impl(arr, slice_index, start, count):
            rank = hpat.distributed_api.get_rank()
            k = slice_index.stop
            # get total characters for allocation
            n_chars = np.uint64(0)
            if k > count:
                my_end = min(count, max(k-start, 0))
                my_arr = arr[:my_end]
                my_arr = hpat.distributed_api.gatherv(my_arr)
                n_chars = hpat.distributed_api.dist_reduce(
                    num_total_chars(my_arr), np.int32(reduce_op))
                if rank == 0:
                    out_arr = my_arr
            else:
                if rank == 0:
                    my_arr = arr[:k]
                    n_chars = num_total_chars(my_arr)
                    out_arr = my_arr
                n_chars = bcast_scalar(n_chars)
            if rank != 0:
                out_arr = pre_alloc_string_array(k, n_chars)

            # actual communication
            hpat.distributed_api.bcast(out_arr)
            return out_arr
Esempio n. 3
0
 def prealloc_impl(arr):
     rank = hpat.distributed_api.get_rank()
     n_loc = bcast_scalar(len(arr))
     n_all_char = bcast_scalar(np.int64(num_total_chars(arr)))
     if rank != MPI_ROOT:
         arr = pre_alloc_string_array(n_loc, n_all_char)
     return arr
Esempio n. 4
0
    def finalize_str_impl(arr, shuffle_meta, is_contig):
        hpat.distributed_api.alltoall(shuffle_meta.send_counts,
                                      shuffle_meta.recv_counts, 1)
        hpat.distributed_api.alltoall(shuffle_meta.send_counts_char,
                                      shuffle_meta.recv_counts_char, 1)
        shuffle_meta.n_out = shuffle_meta.recv_counts.sum()
        n_all_chars = shuffle_meta.recv_counts_char.sum()
        shuffle_meta.out_arr = pre_alloc_string_array(shuffle_meta.n_out,
                                                      n_all_chars)
        shuffle_meta.send_disp = hpat.hiframes_join.calc_disp(
            shuffle_meta.send_counts)
        shuffle_meta.recv_disp = hpat.hiframes_join.calc_disp(
            shuffle_meta.recv_counts)
        shuffle_meta.send_disp_char = hpat.hiframes_join.calc_disp(
            shuffle_meta.send_counts_char)
        shuffle_meta.recv_disp_char = hpat.hiframes_join.calc_disp(
            shuffle_meta.recv_counts_char)

        if not is_contig:
            shuffle_meta.n_send = shuffle_meta.send_counts.sum()
            shuffle_meta.send_arr_lens = np.empty(shuffle_meta.n_send,
                                                  np.uint32)
            # shuffle_meta.send_arr_lens = np.empty(len(arr), np.uint32)
            # s_n_all_chars = num_total_chars(arr)
            s_n_all_chars = shuffle_meta.send_counts_char.sum()
            shuffle_meta.send_arr_chars_arr = np.empty(s_n_all_chars, np.uint8)
            shuffle_meta.send_arr_chars = get_ctypes_ptr(
                shuffle_meta.send_arr_chars_arr.ctypes)
Esempio n. 5
0
 def set_string_to_array(A):
     # TODO: support unicode
     num_total_chars = num_total_chars_set_string(A)
     num_strs = len(A)
     str_arr = pre_alloc_string_array(num_strs, num_total_chars)
     populate_str_arr_from_set(A, str_arr)
     return str_arr
Esempio n. 6
0
def _str_replace_noregex_impl(str_arr, pat, val):
    numba.parfor.init_prange()
    n = len(str_arr)
    n_total_chars = 0
    str_list = hpat.str_ext.alloc_str_list(n)
    for i in numba.parfor.internal_prange(n):
        out_str = str_arr[i].replace(pat, val)
        str_list[i] = out_str
        n_total_chars += get_utf8_size(out_str)
    numba.parfor.init_prange()
    out_arr = pre_alloc_string_array(n, n_total_chars)
    for i in numba.parfor.internal_prange(n):
        _str = str_list[i]
        out_arr[i] = _str
    return hpat.hiframes.api.init_series(out_arr)
Esempio n. 7
0
def ensure_capacity_str(arr, new_size, n_chars):
    # new_size is right after write index
    new_arr = arr
    curr_len = len(arr)
    curr_num_chars = num_total_chars(arr)

    # TODO: corner case test
    #print("new alloc", new_size, curr_len, getitem_str_offset(arr, new_size-1), n_chars, curr_num_chars)
    if curr_len < new_size or getitem_str_offset(arr, new_size-1) + n_chars > curr_num_chars:
        new_len = 2 * curr_len
        new_num_chars = 2 * curr_num_chars
        new_arr = pre_alloc_string_array(new_len, new_num_chars)
        copy_str_arr_slice(new_arr, arr, new_size-1)

    return new_arr
Esempio n. 8
0
        def gatherv_str_arr_impl(data):
            rank = hpat.distributed_api.get_rank()
            n_loc = len(data)
            n_all_chars = num_total_chars(data)

            # allocate send lens arrays
            send_arr_lens = np.empty(n_loc, np.uint32)  # XXX offset type is uint32
            send_data_ptr = get_data_ptr(data)

            for i in range(n_loc):
                _str = data[i]
                send_arr_lens[i] = len(_str)

            recv_counts = gather_scalar(np.int32(n_loc))
            recv_counts_char = gather_scalar(np.int32(n_all_chars))
            n_total = recv_counts.sum()
            n_total_char = recv_counts_char.sum()

            # displacements
            all_data = StringArray([''])  # dummy arrays on non-root PEs
            displs = np.empty(0, np.int32)
            displs_char = np.empty(0, np.int32)

            if rank == MPI_ROOT:
                all_data = pre_alloc_string_array(n_total, n_total_char)
                displs = hpat.hiframes.join.calc_disp(recv_counts)
                displs_char = hpat.hiframes.join.calc_disp(recv_counts_char)

            offset_ptr = get_offset_ptr(all_data)
            data_ptr = get_data_ptr(all_data)
            c_gatherv(
                send_arr_lens.ctypes,
                np.int32(n_loc),
                offset_ptr,
                recv_counts.ctypes,
                displs.ctypes,
                int32_typ_enum)
            c_gatherv(
                send_data_ptr,
                np.int32(n_all_chars),
                data_ptr,
                recv_counts_char.ctypes,
                displs_char.ctypes,
                char_typ_enum)
            convert_len_arr_to_offset(offset_ptr, n_total)
            return all_data
Esempio n. 9
0
def _str_replace_regex_impl(str_arr, pat, val):
    numba.parfor.init_prange()
    e = hpat.str_ext.compile_regex(unicode_to_std_str(pat))
    val = unicode_to_std_str(val)
    n = len(str_arr)
    n_total_chars = 0
    str_list = hpat.str_ext.alloc_str_list(n)
    for i in numba.parfor.internal_prange(n):
        # TODO: support unicode
        in_str = unicode_to_std_str(str_arr[i])
        out_str = std_str_to_unicode(
            hpat.str_ext.str_replace_regex(in_str, e, val))
        str_list[i] = out_str
        n_total_chars += len(out_str)
    numba.parfor.init_prange()
    out_arr = pre_alloc_string_array(n, n_total_chars)
    for i in numba.parfor.internal_prange(n):
        _str = str_list[i]
        out_arr[i] = _str
    return hpat.hiframes.api.init_series(out_arr)
Esempio n. 10
0
 def trim_arr_str(arr, size):
     # print("trim size", size, arr[size-1], getitem_str_offset(arr, size))
     new_arr = pre_alloc_string_array(size, np.int64(getitem_str_offset(arr, size)))
     copy_str_arr_slice(new_arr, arr, size)
     return new_arr
Esempio n. 11
0
 def empty_like_type_str_arr(n, arr):
     # average character heuristic
     avg_chars = 20  # heuristic
     if len(arr) != 0:
         avg_chars = num_total_chars(arr) // len(arr)
     return pre_alloc_string_array(n, n * avg_chars)
Esempio n. 12
0
 def set_string_to_array(str_set):
     num_total_chars = num_total_chars_set_string(str_set)
     num_strs = len(str_set)
     str_arr = pre_alloc_string_array(num_strs, num_total_chars)
     populate_str_arr_from_set(str_set, str_arr)
     return str_arr