def check_int_float(N): f = ak.randint(0, 2**63, N, dtype=ak.float64) i = ak.randint(0, 2**63, N, dtype=ak.int64) perm = ak.coargsort([f, i]) assert ak.is_sorted(f[perm]) perm = ak.coargsort([i, f]) assert ak.is_sorted(i[perm])
def check_correctness(dtype): N = 10**4 if dtype == 'int64': a = ak.randint(0, 2**32, N) z = ak.zeros(N, dtype=dtype) elif dtype == 'float64': a = ak.randint(0, 1, N, dtype=ak.float64) z = ak.zeros(N, dtype=dtype) perm = ak.coargsort([a, z]) assert ak.is_sorted(a[perm]) perm = ak.coargsort([z, a]) assert ak.is_sorted(a[perm])
def time_ak_coargsort(N_per_locale, trials, dtype, seed): print(">>> arkouda {} coargsort".format(dtype)) cfg = ak.get_config() N = N_per_locale * cfg["numLocales"] print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N)) for numArrays in (1, 2, 8, 16): if seed is None: seeds = [None for _ in range(numArrays)] else: seeds = [seed+i for i in range(numArrays)] if dtype == 'int64': arrs = [ak.randint(0, 2**32, N//numArrays, seed=s) for s in seeds] nbytes = sum(a.size * a.itemsize for a in arrs) elif dtype == 'float64': arrs = [ak.randint(0, 1, N//numArrays, dtype=ak.float64, seed=s) for s in seeds] nbytes = sum(a.size * a.itemsize for a in arrs) elif dtype == 'str': arrs = [ak.random_strings_uniform(1, 8, N//numArrays, seed=s) for s in seeds] nbytes = sum(a.bytes.size * a.bytes.itemsize for a in arrs) timings = [] for i in range(trials): start = time.time() perm = ak.coargsort(arrs) end = time.time() timings.append(end - start) tavg = sum(timings) / trials a = arrs[0][perm] if dtype in ('int64', 'float64'): assert ak.is_sorted(a) print("{}-array Average time = {:.4f} sec".format(numArrays, tavg)) bytes_per_sec = nbytes / tavg print("{}-array Average rate = {:.4f} GiB/sec".format(numArrays, bytes_per_sec/2**30))
def time_ak_coargsort(N_per_locale, trials, dtype): print(">>> arkouda coargsort") cfg = ak.get_config() N = N_per_locale * cfg["numLocales"] print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N)) for numArrays in (1, 2, 8, 16): if dtype == 'int64': arrs = [ ak.randint(0, 2**32, N // numArrays) for _ in range(numArrays) ] elif dtype == 'float64': arrs = [ ak.randint(0, 1, N // numArrays, dtype=ak.float64) for _ in range(numArrays) ] timings = [] for i in range(trials): start = time.time() perm = ak.coargsort(arrs) end = time.time() timings.append(end - start) tavg = sum(timings) / trials a = arrs[0][perm] assert ak.is_sorted(a) print("{}-array Average time = {:.4f} sec".format(numArrays, tavg)) bytes_per_sec = sum(a.size * a.itemsize for a in arrs) / tavg print("{}-array Average rate = {:.4f} GiB/sec".format( numArrays, bytes_per_sec / 2**30))
def check_correctness(dtype, seed): N = 10**4 if dtype == 'int64': a = ak.randint(0, 2**32, N, seed=seed) z = ak.zeros(N, dtype=dtype) elif dtype == 'float64': a = ak.randint(0, 1, N, dtype=ak.float64, seed=seed) z = ak.zeros(N, dtype=dtype) elif dtype == 'str': a = ak.random_strings_uniform(1, 16, N, seed=seed) z = ak.cast(ak.zeros(N), 'str') perm = ak.coargsort([a, z]) if dtype in ('int64', 'float64'): assert ak.is_sorted(a[perm]) perm = ak.coargsort([z, a]) if dtype in ('int64', 'float64'): assert ak.is_sorted(a[perm])
def check_float(N): a = ak.randint(0, 1, N, dtype=ak.float64) n = ak.randint(-1, 1, N, dtype=ak.float64) z = ak.zeros(N, dtype=ak.float64) perm = ak.coargsort([a]) assert ak.is_sorted(a[perm]) perm = ak.coargsort([a, n]) assert ak.is_sorted(a[perm]) perm = ak.coargsort([n, a]) assert ak.is_sorted(n[perm]) perm = ak.coargsort([z, a]) assert ak.is_sorted(a[perm]) perm = ak.coargsort([z, n]) assert ak.is_sorted(n[perm])
def check_coargsort(N): # create np version a = np.arange(N) a = a[::-1] iv = np.lexsort([a, a]) a = a[iv] # create ak version b = ak.arange(N) b = b[::-1] iv = ak.coargsort([b, b]) b = b[iv] # print(a,b) c = a == b.to_ndarray() # print(type(c),c) return pass_fail(c.all())
def gen_rmat_edges(lgNv, Ne_per_v, p, perm=False): # number of vertices Nv = 2**lgNv # number of edges Ne = Ne_per_v * Nv # probabilities a = p b = (1.0 - a) / 3.0 c = b d = b # init edge arrays ii = ak.ones(Ne, dtype=ak.int64) jj = ak.ones(Ne, dtype=ak.int64) # quantites to use in edge generation loop ab = a + b c_norm = c / (c + d) a_norm = a / (a + b) # generate edges for ib in range(1, lgNv): ii_bit = (ak.randint(0, 1, Ne, dtype=ak.float64) > ab) jj_bit = (ak.randint(0, 1, Ne, dtype=ak.float64) > (c_norm * ii_bit + a_norm * (~ii_bit))) ii = ii + ((2**(ib - 1)) * ii_bit) jj = jj + ((2**(ib - 1)) * jj_bit) # sort all based on ii and jj using coargsort # all edges should be sorted based on both vertices of the edge iv = ak.coargsort((ii, jj)) # permute into sorted order ii = ii[iv] # permute first vertex into sorted order jj = jj[iv] # permute second vertex into sorted order # to premute/rename vertices if perm: # generate permutation for new vertex numbers(names) ir = ak.argsort(ak.randint(0, 1, Nv, dtype=ak.float64)) # renumber(rename) vertices ii = ir[ii] # rename first vertex jj = ir[jj] # rename second vertex # # maybe: remove edges which are self-loops??? # # return pair of pdarrays return (ii, jj)
def invert_permutation(perm): """ Find the inverse of a permutation array. Parameters ---------- perm : ak.pdarray The permutation array. Returns ------- ak.array The inverse of the permutation array. """ # I think this suffers from overflow errors on large arrays. #if perm.sum() != (perm.size * (perm.size -1)) / 2: # raise ValueError("The indicated permutation is invalid.") if ak.unique(perm).size != perm.size: raise ValueError("The array is not a permutation.") return ak.coargsort([perm, ak.arange(0, perm.size)])
def invert_permutation(perm): """ Find the inverse of a permutation array. Parameters ---------- perm : ak.pdarray The permutation array. Returns ------- ak.pdarray The inverse of the permutation array. """ # Test if the array is actually a permutation rng = perm.max() - perm.min() if (ak.unique(perm).size != perm.size) and (perm.size != rng + 1): raise ValueError("The array is not a permutation.") return ak.coargsort([perm, ak.arange(0, perm.size)])
def coargsort(self, keys, ascending=True): """ Return the permutation that sorts the dataframe by `keys`. Parameters ---------- keys : list The keys to sort on. Returns ------- ak.pdarray The permutation array that sorts the data on `keys`. """ if self._empty: return ak.array([], dtype=ak.int64) arrays = [] for key in keys: arrays.append(self[key]) i = ak.coargsort(arrays) if not ascending: i = i[ak.arange(self.size - 1, -1, -1)] return i
def do_argsort(data, algo): if isinstance(data, (ak.pdarray, ak.Strings)): return ak.argsort(data, algo) else: return ak.coargsort(data, algo)
def check_int(N): z = ak.zeros(N, dtype=ak.int64) a2 = ak.randint(0, 2**16, N) b2 = ak.randint(0, 2**16, N) c2 = ak.randint(0, 2**16, N) d2 = ak.randint(0, 2**16, N) n2 = ak.randint(-(2**15), 2**15, N) perm = ak.coargsort([a2]) assert ak.is_sorted(a2[perm]) perm = ak.coargsort([n2]) assert ak.is_sorted(n2[perm]) perm = ak.coargsort([a2, b2, c2, d2]) assert ak.is_sorted(a2[perm]) perm = ak.coargsort([z, b2, c2, d2]) assert ak.is_sorted(b2[perm]) perm = ak.coargsort([z, z, c2, d2]) assert ak.is_sorted(c2[perm]) perm = ak.coargsort([z, z, z, d2]) assert ak.is_sorted(d2[perm]) a4 = ak.randint(0, 2**32, N) b4 = ak.randint(0, 2**32, N) n4 = ak.randint(-(2**31), 2**31, N) perm = ak.coargsort([a4]) assert ak.is_sorted(a4[perm]) perm = ak.coargsort([n4]) assert ak.is_sorted(n4[perm]) perm = ak.coargsort([a4, b4]) assert ak.is_sorted(a4[perm]) perm = ak.coargsort([b4, a4]) assert ak.is_sorted(b4[perm]) a8 = ak.randint(0, 2**64, N) b8 = ak.randint(0, 2**64, N) n8 = ak.randint(-(2**63), 2**64, N) perm = ak.coargsort([a8]) assert ak.is_sorted(a8[perm]) perm = ak.coargsort([n8]) assert ak.is_sorted(n8[perm]) perm = ak.coargsort([b8, a8]) assert ak.is_sorted(b8[perm]) from itertools import permutations all_perm = permutations([a2, a4, a8]) for p in all_perm: perm = ak.coargsort(p) assert ak.is_sorted(p[0][perm])
def check_large(N): l = [ak.randint(0, 2**63, N) for _ in range(10)] perm = ak.coargsort(l) assert ak.is_sorted(l[0][perm])
def argsort(self, ascending=True): i = ak.coargsort(self.index) if not ascending: i = i[ak.arange(self.size - 1, -1, -1)] return i