def parallel_join_impl(key_arrs, data): # alloc shuffle meta n_pes = sdc.distributed_api.get_size() pre_shuffle_meta = alloc_pre_shuffle_metadata(key_arrs, data, n_pes, False) # calc send/recv counts for i in range(len(key_arrs[0])): val = getitem_arr_tup_single(key_arrs, i) node_id = hash(val) % n_pes update_shuffle_meta(pre_shuffle_meta, node_id, i, val_to_tup(val), getitem_arr_tup(data, i), False) shuffle_meta = finalize_shuffle_meta(key_arrs, data, pre_shuffle_meta, n_pes, False) # write send buffers for i in range(len(key_arrs[0])): val = getitem_arr_tup_single(key_arrs, i) node_id = hash(val) % n_pes write_send_buff(shuffle_meta, node_id, i, val_to_tup(val), data) # update last since it is reused in data shuffle_meta.tmp_offset[node_id] += 1 # shuffle recvs = alltoallv_tup(key_arrs + data, shuffle_meta) out_keys = _get_keys_tup(recvs, key_arrs) out_data = _get_data_tup(recvs, key_arrs) return out_keys, out_data
def parallel_sort(key_arrs, data, ascending=True): n_local = len(key_arrs[0]) n_total = sdc.distributed_api.dist_reduce(n_local, np.int32(Reduce_Type.Sum.value)) n_pes = sdc.distributed_api.get_size() my_rank = sdc.distributed_api.get_rank() # similar to Spark's sample computation Partitioner.scala sampleSize = min(samplePointsPerPartitionHint * n_pes, MIN_SAMPLES) fraction = min(sampleSize / max(n_total, 1), 1.0) n_loc_samples = min(math.ceil(fraction * n_local), n_local) inds = np.random.randint(0, n_local, n_loc_samples) samples = key_arrs[0][inds] # print(sampleSize, fraction, n_local, n_loc_samples, len(samples)) all_samples = sdc.distributed_api.gatherv(samples) all_samples = to_string_list(all_samples) bounds = empty_like_type(n_pes - 1, all_samples) if my_rank == MPI_ROOT: all_samples.sort() if not ascending: all_samples = all_samples[::-1] n_samples = len(all_samples) step = math.ceil(n_samples / n_pes) for i in range(n_pes - 1): bounds[i] = all_samples[min((i + 1) * step, n_samples - 1)] # print(bounds) bounds = str_list_to_array(bounds) bounds = sdc.distributed_api.prealloc_str_for_bcast(bounds) sdc.distributed_api.bcast(bounds) # calc send/recv counts pre_shuffle_meta = alloc_pre_shuffle_metadata(key_arrs, data, n_pes, True) node_id = 0 for i in range(n_local): val = key_arrs[0][i] # TODO: refactor if node_id < (n_pes - 1) and (ascending and val >= bounds[node_id] or (not ascending) and val <= bounds[node_id]): node_id += 1 update_shuffle_meta(pre_shuffle_meta, node_id, i, (val, ), getitem_arr_tup(data, i), True) shuffle_meta = finalize_shuffle_meta(key_arrs, data, pre_shuffle_meta, n_pes, True) # shuffle recvs = alltoallv_tup(key_arrs + data, shuffle_meta) out_key = _get_keys_tup(recvs, key_arrs) out_data = _get_data_tup(recvs, key_arrs) return out_key, out_data