Example #1
0
 def sdc_array_equal_str_arr_impl(A, B):
     is_index_equal = (len(A) == len(B)
                       and num_total_chars(A) == num_total_chars(B))
     for i in numpy.arange(len(A)):
         if (A[i] != B[i]
                 or str_arr_is_na(A, i) is not str_arr_is_na(B, i)):
             return False
     return is_index_equal
Example #2
0
        def sdc_check_indexes_equal_string_impl(A, B):
            # TODO: replace with StringArrays comparison
            is_index_equal = (len(A) == len(B)
                              and num_total_chars(A) == num_total_chars(B))
            for i in numpy.arange(len(A)):
                if (A[i] != B[i]
                        or str_arr_is_na(A, i) is not str_arr_is_na(B, i)):
                    return False

            return is_index_equal
Example #3
0
            def _append_single_string_array_impl(A, B):
                total_size = len(A) + len(B)
                total_chars = num_total_chars(A) + num_total_chars(B)
                new_data = sdc.str_arr_ext.pre_alloc_string_array(total_size, total_chars)

                pos = 0
                pos += append_string_array_to(new_data, pos, A)
                pos += append_string_array_to(new_data, pos, B)

                return new_data
Example #4
0
        def getitem_str_impl(arr, slice_index, start, count):
            rank = sdc.distributed_api.get_rank()
            k = slice_index.stop

            # get total characters for allocation
            n_chars = np.uint64(0)
            if k > start:
                # if slice end is beyond the start of this subset we have to send our elements
                my_end = min(count, k - start)
                my_arr = arr[:my_end]
            else:
                my_arr = arr[:0]

            # get the total number of chars in our array, then gather all arrays into one
            # and compute total number of chars in all arrays
            n_chars = num_total_chars(my_arr)
            my_arr = sdc.distributed_api.gatherv(my_arr)
            n_chars = sdc.distributed_api.dist_reduce(n_chars,
                                                      np.int32(reduce_op))

            if rank != 0:
                out_arr = pre_alloc_string_array(k, n_chars)
            else:
                out_arr = my_arr

            # actual communication
            sdc.distributed_api.bcast(out_arr)
            return out_arr
Example #5
0
 def prealloc_impl(arr):
     rank = sdc.distributed_api.get_rank()
     n_loc = bcast_scalar(len(arr))
     n_all_char = bcast_scalar(np.int64(num_total_chars(arr)))
     if rank != MPI_ROOT:
         arr = pre_alloc_string_array(n_loc, n_all_char)
     return arr
Example #6
0
            def _append_list_string_array_impl(A, B):
                array_list = [A] + list(B)
                total_size = numpy.array([len(arr) for arr in array_list]).sum()
                total_chars = numpy.array([num_total_chars(arr) for arr in array_list]).sum()

                new_data = sdc.str_arr_ext.pre_alloc_string_array(total_size, total_chars)

                pos = 0
                pos += append_string_array_to(new_data, pos, A)
                for arr in B:
                    pos += append_string_array_to(new_data, pos, arr)

                return new_data
Example #7
0
        def gatherv_str_arr_impl(data):
            rank = sdc.distributed_api.get_rank()
            n_loc = len(data)
            n_all_chars = num_total_chars(data)

            # allocate send lens arrays
            send_arr_lens = np.empty(n_loc, np.uint32)  # XXX offset type is uint32
            send_data_ptr = get_data_ptr(data)

            for i in range(n_loc):
                _str = data[i]
                send_arr_lens[i] = len(_str)

            recv_counts = gather_scalar(np.int32(n_loc))
            recv_counts_char = gather_scalar(np.int32(n_all_chars))
            n_total = recv_counts.sum()
            n_total_char = recv_counts_char.sum()

            # displacements
            all_data = StringArray([''])  # dummy arrays on non-root PEs
            displs = np.empty(0, np.int32)
            displs_char = np.empty(0, np.int32)

            if rank == MPI_ROOT:
                all_data = pre_alloc_string_array(n_total, n_total_char)
                displs = sdc.hiframes.join.calc_disp(recv_counts)
                displs_char = sdc.hiframes.join.calc_disp(recv_counts_char)

            offset_ptr = get_offset_ptr(all_data)
            data_ptr = get_data_ptr(all_data)
            c_gatherv(
                send_arr_lens.ctypes,
                np.int32(n_loc),
                offset_ptr,
                recv_counts.ctypes,
                displs.ctypes,
                int32_typ_enum)
            c_gatherv(
                send_data_ptr,
                np.int32(n_all_chars),
                data_ptr,
                recv_counts_char.ctypes,
                displs_char.ctypes,
                char_typ_enum)
            convert_len_arr_to_offset(offset_ptr, n_total)
            return all_data
Example #8
0
def ensure_capacity_str(arr, new_size, n_chars):
    # new_size is right after write index
    new_arr = arr
    curr_len = len(arr)
    curr_num_chars = num_total_chars(arr)
    needed_total_chars = getitem_str_offset(arr, new_size - 1) + n_chars

    # TODO: corner case test
    #print("new alloc", new_size, curr_len, getitem_str_offset(arr, new_size-1), n_chars, curr_num_chars)
    if curr_len < new_size or needed_total_chars > curr_num_chars:
        new_len = int(2 * curr_len if curr_len < new_size else curr_len)
        new_num_chars = int(
            2 * curr_num_chars +
            n_chars if needed_total_chars > curr_num_chars else curr_num_chars)
        new_arr = pre_alloc_string_array(new_len, new_num_chars)
        copy_str_arr_slice(new_arr, arr, new_size - 1)

    return new_arr
Example #9
0
        def bcast_str_impl(data):
            rank = sdc.distributed_api.get_rank()
            n_loc = len(data)
            n_all_chars = num_total_chars(data)
            assert n_loc < INT_MAX
            assert n_all_chars < INT_MAX

            offset_ptr = get_offset_ptr(data)
            data_ptr = get_data_ptr(data)

            if rank == MPI_ROOT:
                send_arr_lens = np.empty(n_loc, np.uint32)  # XXX offset type is uint32
                for i in range(n_loc):
                    _str = data[i]
                    send_arr_lens[i] = len(_str)

                c_bcast(send_arr_lens.ctypes, np.int32(n_loc), int32_typ_enum)
            else:
                c_bcast(offset_ptr, np.int32(n_loc), int32_typ_enum)

            c_bcast(data_ptr, np.int32(n_all_chars), char_typ_enum)
            if rank != MPI_ROOT:
                convert_len_arr_to_offset(offset_ptr, n_loc)
Example #10
0
 def empty_like_type_str_arr(n, arr):
     # average character heuristic
     avg_chars = 20  # heuristic
     if len(arr) != 0:
         avg_chars = num_total_chars(arr) // len(arr)
     return pre_alloc_string_array(n, n * avg_chars)