def parallel_join_impl(key_arrs, data): # alloc shuffle meta n_pes = sdc.distributed_api.get_size() pre_shuffle_meta = alloc_pre_shuffle_metadata(key_arrs, data, n_pes, False) # calc send/recv counts for i in range(len(key_arrs[0])): val = getitem_arr_tup_single(key_arrs, i) node_id = hash(val) % n_pes update_shuffle_meta(pre_shuffle_meta, node_id, i, val_to_tup(val), getitem_arr_tup(data, i), False) shuffle_meta = finalize_shuffle_meta(key_arrs, data, pre_shuffle_meta, n_pes, False) # write send buffers for i in range(len(key_arrs[0])): val = getitem_arr_tup_single(key_arrs, i) node_id = hash(val) % n_pes write_send_buff(shuffle_meta, node_id, i, val_to_tup(val), data) # update last since it is reused in data shuffle_meta.tmp_offset[node_id] += 1 # shuffle recvs = alltoallv_tup(key_arrs + data, shuffle_meta) out_keys = _get_keys_tup(recvs, key_arrs) out_data = _get_data_tup(recvs, key_arrs) return out_keys, out_data
def parallel_sort(key_arrs, data, ascending=True): n_local = len(key_arrs[0]) n_total = sdc.distributed_api.dist_reduce(n_local, np.int32(Reduce_Type.Sum.value)) n_pes = sdc.distributed_api.get_size() my_rank = sdc.distributed_api.get_rank() # similar to Spark's sample computation Partitioner.scala sampleSize = min(samplePointsPerPartitionHint * n_pes, MIN_SAMPLES) fraction = min(sampleSize / max(n_total, 1), 1.0) n_loc_samples = min(math.ceil(fraction * n_local), n_local) inds = np.random.randint(0, n_local, n_loc_samples) samples = key_arrs[0][inds] # print(sampleSize, fraction, n_local, n_loc_samples, len(samples)) all_samples = sdc.distributed_api.gatherv(samples) all_samples = to_string_list(all_samples) bounds = empty_like_type(n_pes - 1, all_samples) if my_rank == MPI_ROOT: all_samples.sort() if not ascending: all_samples = all_samples[::-1] n_samples = len(all_samples) step = math.ceil(n_samples / n_pes) for i in range(n_pes - 1): bounds[i] = all_samples[min((i + 1) * step, n_samples - 1)] # print(bounds) bounds = str_list_to_array(bounds) bounds = sdc.distributed_api.prealloc_str_for_bcast(bounds) sdc.distributed_api.bcast(bounds) # calc send/recv counts pre_shuffle_meta = alloc_pre_shuffle_metadata(key_arrs, data, n_pes, True) node_id = 0 for i in range(n_local): val = key_arrs[0][i] # TODO: refactor if node_id < (n_pes - 1) and (ascending and val >= bounds[node_id] or (not ascending) and val <= bounds[node_id]): node_id += 1 update_shuffle_meta(pre_shuffle_meta, node_id, i, (val, ), getitem_arr_tup(data, i), True) shuffle_meta = finalize_shuffle_meta(key_arrs, data, pre_shuffle_meta, n_pes, True) # shuffle recvs = alltoallv_tup(key_arrs + data, shuffle_meta) out_key = _get_keys_tup(recvs, key_arrs) out_data = _get_data_tup(recvs, key_arrs) return out_key, out_data
def local_merge_asof(left_keys, right_keys, data_left, data_right): # adapted from pandas/_libs/join_func_helper.pxi l_size = len(left_keys[0]) r_size = len(right_keys[0]) out_left_keys = alloc_arr_tup(l_size, left_keys) out_right_keys = alloc_arr_tup(l_size, right_keys) out_data_left = alloc_arr_tup(l_size, data_left) out_data_right = alloc_arr_tup(l_size, data_right) left_ind = 0 right_ind = 0 for left_ind in range(l_size): # restart right_ind if it went negative in a previous iteration if right_ind < 0: right_ind = 0 # find last position in right whose value is less than left's while right_ind < r_size and getitem_arr_tup( right_keys, right_ind) <= getitem_arr_tup(left_keys, left_ind): right_ind += 1 right_ind -= 1 setitem_arr_tup(out_left_keys, left_ind, getitem_arr_tup(left_keys, left_ind)) # TODO: copy_tup setitem_arr_tup(out_data_left, left_ind, getitem_arr_tup(data_left, left_ind)) if right_ind >= 0: setitem_arr_tup(out_right_keys, left_ind, getitem_arr_tup(right_keys, right_ind)) setitem_arr_tup(out_data_right, left_ind, getitem_arr_tup(data_right, right_ind)) else: setitem_arr_tup_nan(out_right_keys, left_ind) setitem_arr_tup_nan(out_data_right, left_ind) return out_left_keys, out_right_keys, out_data_left, out_data_right
def getitem_arr_tup_single_overload(arrs, i): if len(arrs.types) == 1: return lambda arrs, i: arrs[0][i] return lambda arrs, i: getitem_arr_tup(arrs, i)
def local_hash_join_impl(left_keys, right_keys, data_left, data_right, is_left=False, is_right=False): l_len = len(left_keys[0]) r_len = len(right_keys[0]) # TODO: approximate output size properly curr_size = 101 + min(l_len, r_len) // 2 if is_left: curr_size = int(1.1 * l_len) if is_right: curr_size = int(1.1 * r_len) if is_left and is_right: curr_size = int(1.1 * (l_len + r_len)) out_left_key = alloc_arr_tup(curr_size, left_keys) out_data_left = alloc_arr_tup(curr_size, data_left) out_data_right = alloc_arr_tup(curr_size, data_right) # keep track of matched keys in case of right join if is_right: r_matched = np.full(r_len, False, np.bool_) out_ind = 0 m = sdc.dict_ext.multimap_int64_init() for i in range(r_len): # store hash if keys are tuple or non-int k = _hash_if_tup(getitem_arr_tup(right_keys, i)) sdc.dict_ext.multimap_int64_insert(m, k, i) r = sdc.dict_ext.multimap_int64_equal_range_alloc() for i in range(l_len): l_key = getitem_arr_tup(left_keys, i) l_data_val = getitem_arr_tup(data_left, i) k = _hash_if_tup(l_key) sdc.dict_ext.multimap_int64_equal_range_inplace(m, k, r) num_matched = 0 for j in r: # if hash for stored, check left key against the actual right key r_ind = _check_ind_if_hashed(right_keys, j, l_key) if r_ind == -1: continue if is_right: r_matched[r_ind] = True out_left_key = copy_elem_buff_tup(out_left_key, out_ind, l_key) r_data_val = getitem_arr_tup(data_right, r_ind) out_data_right = copy_elem_buff_tup(out_data_right, out_ind, r_data_val) out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val) out_ind += 1 num_matched += 1 if is_left and num_matched == 0: out_left_key = copy_elem_buff_tup(out_left_key, out_ind, l_key) out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val) out_data_right = setnan_elem_buff_tup(out_data_right, out_ind) out_ind += 1 sdc.dict_ext.multimap_int64_equal_range_dealloc(r) # produce NA rows for unmatched right keys if is_right: for i in range(r_len): if not r_matched[i]: r_key = getitem_arr_tup(right_keys, i) r_data_val = getitem_arr_tup(data_right, i) out_left_key = copy_elem_buff_tup(out_left_key, out_ind, r_key) out_data_right = copy_elem_buff_tup(out_data_right, out_ind, r_data_val) out_data_left = setnan_elem_buff_tup(out_data_left, out_ind) out_ind += 1 out_left_key = trim_arr_tup(out_left_key, out_ind) out_right_key = copy_arr_tup(out_left_key) out_data_left = trim_arr_tup(out_data_left, out_ind) out_data_right = trim_arr_tup(out_data_right, out_ind) return out_left_key, out_right_key, out_data_left, out_data_right
def local_merge_new(left_keys, right_keys, data_left, data_right, is_left=False, is_outer=False): l_len = len(left_keys[0]) r_len = len(right_keys[0]) # TODO: approximate output size properly curr_size = 101 + min(l_len, r_len) // 2 if is_left: curr_size = int(1.1 * l_len) if is_outer: curr_size = int(1.1 * r_len) if is_left and is_outer: curr_size = int(1.1 * (l_len + r_len)) out_left_key = alloc_arr_tup(curr_size, left_keys) out_data_left = alloc_arr_tup(curr_size, data_left) out_data_right = alloc_arr_tup(curr_size, data_right) out_ind = 0 left_ind = 0 right_ind = 0 while left_ind < len(left_keys[0]) and right_ind < len(right_keys[0]): if getitem_arr_tup(left_keys, left_ind) == getitem_arr_tup(right_keys, right_ind): key = getitem_arr_tup(left_keys, left_ind) # catesian product in case of duplicate keys on either side left_run = left_ind while left_run < len(left_keys[0]) and getitem_arr_tup( left_keys, left_run) == key: right_run = right_ind while right_run < len(right_keys[0]) and getitem_arr_tup( right_keys, right_run) == key: out_left_key = copy_elem_buff_tup(out_left_key, out_ind, key) l_data_val = getitem_arr_tup(data_left, left_run) out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val) r_data_val = getitem_arr_tup(data_right, right_run) out_data_right = copy_elem_buff_tup( out_data_right, out_ind, r_data_val) out_ind += 1 right_run += 1 left_run += 1 left_ind = left_run right_ind = right_run elif getitem_arr_tup(left_keys, left_ind) < getitem_arr_tup( right_keys, right_ind): if is_left: out_left_key = copy_elem_buff_tup( out_left_key, out_ind, getitem_arr_tup(left_keys, left_ind)) l_data_val = getitem_arr_tup(data_left, left_ind) out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val) out_data_right = setnan_elem_buff_tup(out_data_right, out_ind) out_ind += 1 left_ind += 1 else: if is_outer: # TODO: support separate keys? out_left_key = copy_elem_buff_tup( out_left_key, out_ind, getitem_arr_tup(right_keys, right_ind)) out_data_left = setnan_elem_buff_tup(out_data_left, out_ind) r_data_val = getitem_arr_tup(data_right, right_ind) out_data_right = copy_elem_buff_tup(out_data_right, out_ind, r_data_val) out_ind += 1 right_ind += 1 if is_left and left_ind < len(left_keys[0]): while left_ind < len(left_keys[0]): out_left_key = copy_elem_buff_tup( out_left_key, out_ind, getitem_arr_tup(left_keys, left_ind)) l_data_val = getitem_arr_tup(data_left, left_ind) out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val) out_data_right = setnan_elem_buff_tup(out_data_right, out_ind) out_ind += 1 left_ind += 1 if is_outer and right_ind < len(right_keys[0]): while right_ind < len(right_keys[0]): out_left_key = copy_elem_buff_tup( out_left_key, out_ind, getitem_arr_tup(right_keys, right_ind)) out_data_left = setnan_elem_buff_tup(out_data_left, out_ind) r_data_val = getitem_arr_tup(data_right, right_ind) out_data_right = copy_elem_buff_tup(out_data_right, out_ind, r_data_val) out_ind += 1 right_ind += 1 #out_left_key = out_left_key[:out_ind] out_left_key = trim_arr_tup(out_left_key, out_ind) out_right_key = copy_arr_tup(out_left_key) out_data_left = trim_arr_tup(out_data_left, out_ind) out_data_right = trim_arr_tup(out_data_right, out_ind) return out_left_key, out_right_key, out_data_left, out_data_right
def _impl(right_keys, r_ind, l_key): r_key = getitem_arr_tup(right_keys, r_ind) if r_key != l_key: return -1 return r_ind