def test_cython_right_outer_join(self): left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) max_group = 5 rs, ls = algos.left_outer_join(right, left, max_group) exp_ls = left.argsort(kind='mergesort') exp_rs = right.argsort(kind='mergesort') # 0 1 1 1 exp_li = a_([0, 1, 2, 3, 4, 5, 3, 4, 5, 3, 4, 5, # 2 2 4 6, 7, 8, 6, 7, 8, -1]) exp_ri = a_([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6]) exp_ls = exp_ls.take(exp_li) exp_ls[exp_li == -1] = -1 exp_rs = exp_rs.take(exp_ri) exp_rs[exp_ri == -1] = -1 self.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) self.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
def _get_multiindex_indexer(join_keys, index, sort): from functools import partial # bind `sort` argument fkeys = partial(_factorize_keys, sort=sort) # left & right join labels and num. of levels at each location rlab, llab, shape = map(list, zip(* map(fkeys, index.levels, join_keys))) if sort: rlab = list(map(np.take, rlab, index.labels)) else: i8copy = lambda a: a.astype('i8', subok=False, copy=True) rlab = list(map(i8copy, index.labels)) # fix right labels if there were any nulls for i in range(len(join_keys)): mask = index.labels[i] == -1 if mask.any(): # check if there already was any nulls at this location # if there was, it is factorized to `shape[i] - 1` a = join_keys[i][llab[i] == shape[i] - 1] if a.size == 0 or not a[0] != a[0]: shape[i] += 1 rlab[i][mask] = shape[i] - 1 # get flat i8 join keys lkey, rkey = _get_join_keys(llab, rlab, shape, sort) # factorize keys to a dense i8 space lkey, rkey, count = fkeys(lkey, rkey) return _algos.left_outer_join(lkey, rkey, count, sort=sort)
def _get_single_indexer(join_key, index, sort=False): left_key, right_key, count = _factorize_keys(join_key, index, sort=sort) left_indexer, right_indexer = algos.left_outer_join( com._ensure_int64(left_key), com._ensure_int64(right_key), count, sort=sort ) return left_indexer, right_indexer
def _get_single_indexer(join_key, index, sort=False): left_key, right_key, count = _factorize_keys(join_key, index, sort=sort) left_indexer, right_indexer = \ algos.left_outer_join(com._ensure_int64(left_key), com._ensure_int64(right_key), count, sort=sort) return left_indexer, right_indexer
def _get_multiindex_indexer(join_keys, index, sort=False): shape = [] labels = [] for level, key in zip(index.levels, join_keys): llab, rlab, count = _factorize_keys(level, key, sort=False) labels.append(rlab) shape.append(count) left_group_key = get_group_index(labels, shape) right_group_key = get_group_index(index.labels, shape) left_group_key, right_group_key, max_groups = _factorize_keys(left_group_key, right_group_key, sort=False) left_indexer, right_indexer = algos.left_outer_join( com._ensure_int64(left_group_key), com._ensure_int64(right_group_key), max_groups, sort=False ) return left_indexer, right_indexer
def test_cython_right_outer_join(self): left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) max_group = 5 rs, ls = algos.left_outer_join(right, left, max_group) exp_ls = left.argsort(kind='mergesort') exp_rs = right.argsort(kind='mergesort') # 0 1 1 1 exp_li = a_([ 0, 1, 2, 3, 4, 5, 3, 4, 5, 3, 4, 5, # 2 2 4 6, 7, 8, 6, 7, 8, -1 ]) exp_ri = a_([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6]) exp_ls = exp_ls.take(exp_li) exp_ls[exp_li == -1] = -1 exp_rs = exp_rs.take(exp_ri) exp_rs[exp_ri == -1] = -1 self.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) self.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
def test_left_outer_join_bug(): left = np.array([0, 1, 0, 1, 1, 2, 3, 1, 0, 2, 1, 2, 0, 1, 1, 2, 3, 2, 3, 2, 1, 1, 3, 0, 3, 2, 3, 0, 0, 2, 3, 2, 0, 3, 1, 3, 0, 1, 3, 0, 0, 1, 0, 3, 1, 0, 1, 0, 1, 1, 0, 2, 2, 2, 2, 2, 0, 3, 1, 2, 0, 0, 3, 1, 3, 2, 2, 0, 1, 3, 0, 2, 3, 2, 3, 3, 2, 3, 3, 1, 3, 2, 0, 0, 3, 1, 1, 1, 0, 2, 3, 3, 1, 2, 0, 3, 1, 2, 0, 2], dtype=np.int64) right = np.array([3, 1], dtype=np.int64) max_groups = 4 lidx, ridx = algos.left_outer_join(left, right, max_groups, sort=False) exp_lidx = np.arange(len(left)) exp_ridx = -np.ones(len(left)) exp_ridx[left == 1] = 1 exp_ridx[left == 3] = 0 assert(np.array_equal(lidx, exp_lidx)) assert(np.array_equal(ridx, exp_ridx))
def test_left_outer_join_bug(): left = np.array([0, 1, 0, 1, 1, 2, 3, 1, 0, 2, 1, 2, 0, 1, 1, 2, 3, 2, 3, 2, 1, 1, 3, 0, 3, 2, 3, 0, 0, 2, 3, 2, 0, 3, 1, 3, 0, 1, 3, 0, 0, 1, 0, 3, 1, 0, 1, 0, 1, 1, 0, 2, 2, 2, 2, 2, 0, 3, 1, 2, 0, 0, 3, 1, 3, 2, 2, 0, 1, 3, 0, 2, 3, 2, 3, 3, 2, 3, 3, 1, 3, 2, 0, 0, 3, 1, 1, 1, 0, 2, 3, 3, 1, 2, 0, 3, 1, 2, 0, 2], dtype=np.int64) right = np.array([3, 1], dtype=np.int64) max_groups = 4 lidx, ridx = _algos.left_outer_join(left, right, max_groups, sort=False) exp_lidx = np.arange(len(left)) exp_ridx = -np.ones(len(left)) exp_ridx[left == 1] = 1 exp_ridx[left == 3] = 0 assert (np.array_equal(lidx, exp_lidx)) assert (np.array_equal(ridx, exp_ridx))
def _get_multiindex_indexer(join_keys, index, sort=False): shape = [] labels = [] for level, key in zip(index.levels, join_keys): llab, rlab, count = _factorize_keys(level, key, sort=False) labels.append(rlab) shape.append(count) left_group_key = get_group_index(labels, shape) right_group_key = get_group_index(index.labels, shape) left_group_key, right_group_key, max_groups = \ _factorize_keys(left_group_key, right_group_key, sort=False) left_indexer, right_indexer = \ algos.left_outer_join(com._ensure_int64(left_group_key), com._ensure_int64(right_group_key), max_groups, sort=False) return left_indexer, right_indexer
def _right_outer_join(x, y, max_groups): right_indexer, left_indexer = _algos.left_outer_join(y, x, max_groups) return left_indexer, right_indexer