def local_merge_new(left_key, right_key, data_left, data_right): curr_size = 101 + min(len(left_key), len(right_key)) // 10 out_left_key = empty_like_type(curr_size, left_key) out_data_left = alloc_arr_tup(curr_size, data_left) out_data_right = alloc_arr_tup(curr_size, data_right) out_ind = 0 left_ind = 0 right_ind = 0 while left_ind < len(left_key) and right_ind < len(right_key): if left_key[left_ind] == right_key[right_ind]: out_left_key = copy_elem_buff(out_left_key, out_ind, left_key[left_ind]) l_data_val = getitem_arr_tup(data_left, left_ind) out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val) r_data_val = getitem_arr_tup(data_right, right_ind) out_data_right = copy_elem_buff_tup(out_data_right, out_ind, r_data_val) out_ind += 1 left_run = left_ind + 1 while left_run < len(left_key) and left_key[left_run] == right_key[right_ind]: out_left_key = copy_elem_buff(out_left_key, out_ind, left_key[left_run]) l_data_val = getitem_arr_tup(data_left, left_run) out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val) r_data_val = getitem_arr_tup(data_right, right_ind) out_data_right = copy_elem_buff_tup(out_data_right, out_ind, r_data_val) out_ind += 1 left_run += 1 right_run = right_ind + 1 while right_run < len(right_key) and right_key[right_run] == left_key[left_ind]: out_left_key = copy_elem_buff(out_left_key, out_ind, left_key[left_ind]) l_data_val = getitem_arr_tup(data_left, left_ind) out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val) r_data_val = getitem_arr_tup(data_right, right_run) out_data_right = copy_elem_buff_tup(out_data_right, out_ind, r_data_val) out_ind += 1 right_run += 1 left_ind += 1 right_ind += 1 elif left_key[left_ind] < right_key[right_ind]: left_ind += 1 else: right_ind += 1 #out_left_key = out_left_key[:out_ind] out_left_key = trim_arr(out_left_key, out_ind) out_right_key = out_left_key.copy() out_data_left = trim_arr_tup(out_data_left, out_ind) out_data_right = trim_arr_tup(out_data_right, out_ind) return out_left_key, out_right_key, out_data_left, out_data_right
def local_merge_asof(left_keys, right_keys, data_left, data_right): # adapted from pandas/_libs/join_func_helper.pxi l_size = len(left_keys[0]) r_size = len(right_keys[0]) out_left_keys = alloc_arr_tup(l_size, left_keys) out_right_keys = alloc_arr_tup(l_size, right_keys) out_data_left = alloc_arr_tup(l_size, data_left) out_data_right = alloc_arr_tup(l_size, data_right) left_ind = 0 right_ind = 0 for left_ind in range(l_size): # restart right_ind if it went negative in a previous iteration if right_ind < 0: right_ind = 0 # find last position in right whose value is less than left's while right_ind < r_size and getitem_arr_tup( right_keys, right_ind) <= getitem_arr_tup(left_keys, left_ind): right_ind += 1 right_ind -= 1 setitem_arr_tup(out_left_keys, left_ind, getitem_arr_tup(left_keys, left_ind)) # TODO: copy_tup setitem_arr_tup(out_data_left, left_ind, getitem_arr_tup(data_left, left_ind)) if right_ind >= 0: setitem_arr_tup(out_right_keys, left_ind, getitem_arr_tup(right_keys, right_ind)) setitem_arr_tup(out_data_right, left_ind, getitem_arr_tup(data_right, right_ind)) else: setitem_arr_tup_nan(out_right_keys, left_ind) setitem_arr_tup_nan(out_data_right, left_ind) return out_left_keys, out_right_keys, out_data_left, out_data_right
def init_sort_start(key_arrs, data): # This controls when we get *into* galloping mode. It is initialized # to MIN_GALLOP. The mergeLo and mergeHi methods nudge it higher for # random data, and lower for highly structured data. minGallop = MIN_GALLOP arr_len = len(key_arrs[0]) # Allocate temp storage (which may be increased later if necessary) tmpLength = (arr_len >> 1 if arr_len < 2 * INITIAL_TMP_STORAGE_LENGTH else INITIAL_TMP_STORAGE_LENGTH) tmp = alloc_arr_tup(tmpLength, key_arrs) tmp_data = alloc_arr_tup(tmpLength, data) # A stack of pending runs yet to be merged. Run i starts at # address base[i] and extends for len[i] elements. It's always # true (so long as the indices are in bounds) that: # # runBase[i] + runLen[i] == runBase[i + 1] # # so we could cut the storage for this, but it's a minor amount, # and keeping all the info explicit simplifies the code. # Allocate runs-to-be-merged stack (which cannot be expanded). The # stack length requirements are described in listsort.txt. The C # version always uses the same stack length (85), but this was # measured to be too expensive when sorting "mid-sized" arrays (e.g., # 100 elements) in Java. Therefore, we use smaller (but sufficiently # large) stack lengths for smaller arrays. The "magic numbers" in the # computation below must be changed if MIN_MERGE is decreased. See # the MIN_MERGE declaration above for more information. stackSize = 0 # Number of pending runs on stack stackLen = 5 if arr_len < 120 else ( 10 if arr_len < 1542 else ( 19 if arr_len < 119151 else 40 )) runBase = np.empty(stackLen, np.int64) runLen = np.empty(stackLen, np.int64) return stackSize, runBase, runLen, tmpLength, tmp, tmp_data, minGallop
def ensureCapacity(self, minCapacity): if self.tmpLength < minCapacity: # Compute smallest power of 2 > minCapacity newSize = minCapacity newSize |= newSize >> 1 newSize |= newSize >> 2 newSize |= newSize >> 4 newSize |= newSize >> 8 newSize |= newSize >> 16 newSize += 1 if newSize < 0: # Not bloody likely! newSize = minCapacity else: newSize = min(newSize, self.aLength >> 1) self.tmp = alloc_arr_tup(newSize, self.key_arrs) self.tmp_data = alloc_arr_tup(newSize, self.data) self.tmpLength = newSize return self.tmp
def ensureCapacity(tmpLength, tmp, tmp_data, key_arrs, data, minCapacity): aLength = len(key_arrs[0]) if tmpLength < minCapacity: # Compute smallest power of 2 > minCapacity newSize = minCapacity newSize |= newSize >> 1 newSize |= newSize >> 2 newSize |= newSize >> 4 newSize |= newSize >> 8 newSize |= newSize >> 16 newSize += 1 if newSize < 0: # Not bloody likely! newSize = minCapacity else: newSize = min(newSize, aLength >> 1) tmp = alloc_arr_tup(newSize, key_arrs) tmp_data = alloc_arr_tup(newSize, data) tmpLength = newSize return tmpLength, tmp, tmp_data
def parallel_asof_comm(left_key_arrs, right_key_arrs, right_data): # align the left and right intervals # allgather the boundaries of all left intervals and calculate overlap # rank = hpat.distributed_api.get_rank() n_pes = hpat.distributed_api.get_size() # TODO: multiple keys bnd_starts = np.empty(n_pes, left_key_arrs[0].dtype) bnd_ends = np.empty(n_pes, left_key_arrs[0].dtype) hpat.distributed_api.allgather(bnd_starts, left_key_arrs[0][0]) hpat.distributed_api.allgather(bnd_ends, left_key_arrs[0][-1]) send_counts = np.zeros(n_pes, np.int32) send_disp = np.zeros(n_pes, np.int32) recv_counts = np.zeros(n_pes, np.int32) my_start = right_key_arrs[0][0] my_end = right_key_arrs[0][-1] offset = -1 i = 0 # ignore no overlap processors (end of their interval is before current) while i < n_pes - 1 and bnd_ends[i] < my_start: i += 1 while i < n_pes and bnd_starts[i] <= my_end: offset, count = _count_overlap(right_key_arrs[0], bnd_starts[i], bnd_ends[i]) # one extra element in case first value is needed for start of boundary if offset != 0: offset -= 1 count += 1 send_counts[i] = count send_disp[i] = offset i += 1 # one extra element in case last value is need for start of boundary # TODO: see if next processor provides the value while i < n_pes: send_counts[i] = 1 send_disp[i] = len(right_key_arrs[0]) - 1 i += 1 hpat.distributed_api.alltoall(send_counts, recv_counts, 1) n_total_recv = recv_counts.sum() out_r_keys = np.empty(n_total_recv, right_key_arrs[0].dtype) # TODO: support string out_r_data = alloc_arr_tup(n_total_recv, right_data) recv_disp = hpat.hiframes.join.calc_disp(recv_counts) hpat.distributed_api.alltoallv(right_key_arrs[0], out_r_keys, send_counts, recv_counts, send_disp, recv_disp) hpat.distributed_api.alltoallv_tup(right_data, out_r_data, send_counts, recv_counts, send_disp, recv_disp) return (out_r_keys, ), out_r_data
def local_merge_new(left_keys, right_keys, data_left, data_right, is_left=False, is_outer=False): l_len = len(left_keys[0]) r_len = len(right_keys[0]) # TODO: approximate output size properly curr_size = 101 + min(l_len, r_len) // 2 if is_left: curr_size = int(1.1 * l_len) if is_outer: curr_size = int(1.1 * r_len) if is_left and is_outer: curr_size = int(1.1 * (l_len + r_len)) out_left_key = alloc_arr_tup(curr_size, left_keys) out_data_left = alloc_arr_tup(curr_size, data_left) out_data_right = alloc_arr_tup(curr_size, data_right) out_ind = 0 left_ind = 0 right_ind = 0 while left_ind < len(left_keys[0]) and right_ind < len(right_keys[0]): if getitem_arr_tup(left_keys, left_ind) == getitem_arr_tup(right_keys, right_ind): key = getitem_arr_tup(left_keys, left_ind) # catesian product in case of duplicate keys on either side left_run = left_ind while left_run < len(left_keys[0]) and getitem_arr_tup(left_keys, left_run) == key: right_run = right_ind while right_run < len(right_keys[0]) and getitem_arr_tup(right_keys, right_run) == key: out_left_key = copy_elem_buff_tup(out_left_key, out_ind, key) l_data_val = getitem_arr_tup(data_left, left_run) out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val) r_data_val = getitem_arr_tup(data_right, right_run) out_data_right = copy_elem_buff_tup(out_data_right, out_ind, r_data_val) out_ind += 1 right_run += 1 left_run += 1 left_ind = left_run right_ind = right_run elif getitem_arr_tup(left_keys, left_ind) < getitem_arr_tup(right_keys, right_ind): if is_left: out_left_key = copy_elem_buff_tup(out_left_key, out_ind, getitem_arr_tup(left_keys, left_ind)) l_data_val = getitem_arr_tup(data_left, left_ind) out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val) out_data_right = setnan_elem_buff_tup(out_data_right, out_ind) out_ind += 1 left_ind += 1 else: if is_outer: # TODO: support separate keys? out_left_key = copy_elem_buff_tup(out_left_key, out_ind, getitem_arr_tup(right_keys, right_ind)) out_data_left = setnan_elem_buff_tup(out_data_left, out_ind) r_data_val = getitem_arr_tup(data_right, right_ind) out_data_right = copy_elem_buff_tup(out_data_right, out_ind, r_data_val) out_ind += 1 right_ind += 1 if is_left and left_ind < len(left_keys[0]): while left_ind < len(left_keys[0]): out_left_key = copy_elem_buff_tup(out_left_key, out_ind, getitem_arr_tup(left_keys, left_ind)) l_data_val = getitem_arr_tup(data_left, left_ind) out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val) out_data_right = setnan_elem_buff_tup(out_data_right, out_ind) out_ind += 1 left_ind += 1 if is_outer and right_ind < len(right_keys[0]): while right_ind < len(right_keys[0]): out_left_key = copy_elem_buff_tup(out_left_key, out_ind, getitem_arr_tup(right_keys, right_ind)) out_data_left = setnan_elem_buff_tup(out_data_left, out_ind) r_data_val = getitem_arr_tup(data_right, right_ind) out_data_right = copy_elem_buff_tup(out_data_right, out_ind, r_data_val) out_ind += 1 right_ind += 1 #out_left_key = out_left_key[:out_ind] out_left_key = trim_arr_tup(out_left_key, out_ind) out_right_key = copy_arr_tup(out_left_key) out_data_left = trim_arr_tup(out_data_left, out_ind) out_data_right = trim_arr_tup(out_data_right, out_ind) return out_left_key, out_right_key, out_data_left, out_data_right
def local_hash_join_impl(left_keys, right_keys, data_left, data_right, is_left=False, is_right=False): l_len = len(left_keys[0]) r_len = len(right_keys[0]) # TODO: approximate output size properly curr_size = 101 + min(l_len, r_len) // 2 if is_left: curr_size = int(1.1 * l_len) if is_right: curr_size = int(1.1 * r_len) if is_left and is_right: curr_size = int(1.1 * (l_len + r_len)) out_left_key = alloc_arr_tup(curr_size, left_keys) out_data_left = alloc_arr_tup(curr_size, data_left) out_data_right = alloc_arr_tup(curr_size, data_right) # keep track of matched keys in case of right join if is_right: r_matched = np.full(r_len, False, np.bool_) out_ind = 0 m = hpat.dict_ext.multimap_int64_init() for i in range(r_len): # store hash if keys are tuple or non-int k = _hash_if_tup(getitem_arr_tup(right_keys, i)) hpat.dict_ext.multimap_int64_insert(m, k, i) r = hpat.dict_ext.multimap_int64_equal_range_alloc() for i in range(l_len): l_key = getitem_arr_tup(left_keys, i) l_data_val = getitem_arr_tup(data_left, i) k = _hash_if_tup(l_key) hpat.dict_ext.multimap_int64_equal_range_inplace(m, k, r) num_matched = 0 for j in r: # if hash for stored, check left key against the actual right key r_ind = _check_ind_if_hashed(right_keys, j, l_key) if r_ind == -1: continue if is_right: r_matched[r_ind] = True out_left_key = copy_elem_buff_tup(out_left_key, out_ind, l_key) r_data_val = getitem_arr_tup(data_right, r_ind) out_data_right = copy_elem_buff_tup(out_data_right, out_ind, r_data_val) out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val) out_ind += 1 num_matched += 1 if is_left and num_matched == 0: out_left_key = copy_elem_buff_tup(out_left_key, out_ind, l_key) out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val) out_data_right = setnan_elem_buff_tup(out_data_right, out_ind) out_ind += 1 hpat.dict_ext.multimap_int64_equal_range_dealloc(r) # produce NA rows for unmatched right keys if is_right: for i in range(r_len): if not r_matched[i]: r_key = getitem_arr_tup(right_keys, i) r_data_val = getitem_arr_tup(data_right, i) out_left_key = copy_elem_buff_tup(out_left_key, out_ind, r_key) out_data_right = copy_elem_buff_tup(out_data_right, out_ind, r_data_val) out_data_left = setnan_elem_buff_tup(out_data_left, out_ind) out_ind += 1 out_left_key = trim_arr_tup(out_left_key, out_ind) out_right_key = copy_arr_tup(out_left_key) out_data_left = trim_arr_tup(out_data_left, out_ind) out_data_right = trim_arr_tup(out_data_right, out_ind) return out_left_key, out_right_key, out_data_left, out_data_right