def gatherv_str_arr_impl(data): rank = sdc.distributed_api.get_rank() n_loc = len(data) n_all_chars = num_total_chars(data) # allocate send lens arrays send_arr_lens = np.empty(n_loc, np.uint32) # XXX offset type is uint32 send_data_ptr = get_data_ptr(data) for i in range(n_loc): _str = data[i] send_arr_lens[i] = len(_str) recv_counts = gather_scalar(np.int32(n_loc)) recv_counts_char = gather_scalar(np.int32(n_all_chars)) n_total = recv_counts.sum() n_total_char = recv_counts_char.sum() # displacements all_data = StringArray(['']) # dummy arrays on non-root PEs displs = np.empty(0, np.int32) displs_char = np.empty(0, np.int32) if rank == MPI_ROOT: all_data = pre_alloc_string_array(n_total, n_total_char) displs = sdc.hiframes.join.calc_disp(recv_counts) displs_char = sdc.hiframes.join.calc_disp(recv_counts_char) offset_ptr = get_offset_ptr(all_data) data_ptr = get_data_ptr(all_data) c_gatherv( send_arr_lens.ctypes, np.int32(n_loc), offset_ptr, recv_counts.ctypes, displs.ctypes, int32_typ_enum) c_gatherv( send_data_ptr, np.int32(n_all_chars), data_ptr, recv_counts_char.ctypes, displs_char.ctypes, char_typ_enum) convert_len_arr_to_offset(offset_ptr, n_total) return all_data
def a2av_str_impl(arr, metadata): # TODO: increate refcount? offset_ptr = get_offset_ptr(metadata.out_arr) sdc.distributed_api.c_alltoallv( metadata.send_arr_lens.ctypes, offset_ptr, metadata.send_counts.ctypes, metadata.recv_counts.ctypes, metadata.send_disp.ctypes, metadata.recv_disp.ctypes, int32_typ_enum) sdc.distributed_api.c_alltoallv(metadata.send_arr_chars, get_data_ptr(metadata.out_arr), metadata.send_counts_char.ctypes, metadata.recv_counts_char.ctypes, metadata.send_disp_char.ctypes, metadata.recv_disp_char.ctypes, char_typ_enum) convert_len_arr_to_offset(offset_ptr, metadata.n_out)
def codegen(context, builder, sig, args): str_arr, _ = args meminfo, meminfo_data_ptr = construct_str_arr_split_view( context, builder) in_str_arr = context.make_helper(builder, string_array_type, str_arr) # (str_arr_split_view_payload* out_view, int64_t n_strs, # uint32_t* offsets, char* data, char sep) fnty = lir.FunctionType(lir.VoidType(), [ meminfo_data_ptr.type, lir.IntType(64), lir.IntType(32).as_pointer(), lir.IntType(8).as_pointer(), lir.IntType(8) ]) fn_impl = builder.module.get_or_insert_function( fnty, name="str_arr_split_view_impl") sep_val = context.get_constant(types.int8, ord(sep_typ.literal_value)) builder.call(fn_impl, [ meminfo_data_ptr, in_str_arr.num_items, in_str_arr.offsets, in_str_arr.data, sep_val ]) view_payload = cgutils.create_struct_proxy( str_arr_split_view_payload_type)( context, builder, value=builder.load(meminfo_data_ptr)) out_view = context.make_helper(builder, string_array_split_view_type) out_view.num_items = in_str_arr.num_items out_view.index_offsets = view_payload.index_offsets out_view.data_offsets = view_payload.data_offsets # TODO: incref? out_view.data = context.compile_internal( builder, lambda S: get_data_ptr(S), data_ctypes_type(string_array_type), [str_arr]) # out_view.null_bitmap = view_payload.null_bitmap out_view.meminfo = meminfo ret = out_view._getvalue() #context.nrt.decref(builder, ty, ret) return impl_ret_new_ref(context, builder, string_array_split_view_type, ret)
def bcast_str_impl(data): rank = sdc.distributed_api.get_rank() n_loc = len(data) n_all_chars = num_total_chars(data) assert n_loc < INT_MAX assert n_all_chars < INT_MAX offset_ptr = get_offset_ptr(data) data_ptr = get_data_ptr(data) if rank == MPI_ROOT: send_arr_lens = np.empty(n_loc, np.uint32) # XXX offset type is uint32 for i in range(n_loc): _str = data[i] send_arr_lens[i] = len(_str) c_bcast(send_arr_lens.ctypes, np.int32(n_loc), int32_typ_enum) else: c_bcast(offset_ptr, np.int32(n_loc), int32_typ_enum) c_bcast(data_ptr, np.int32(n_all_chars), char_typ_enum) if rank != MPI_ROOT: convert_len_arr_to_offset(offset_ptr, n_loc)