def test_dtypes(self): f = ak.zeros(10, dtype=ak.float64) with self.assertRaises(TypeError) as cm: f.popcount() b = ak.zeros(10, dtype=ak.bool) with self.assertRaises(TypeError) as cm: ak.popcount(f)
def test_zeros(self): intZeros = ak.zeros(5, dtype=ak.int64) self.assertIsInstance(intZeros, ak.pdarray) self.assertEqual(ak.int64, intZeros.dtype) floatZeros = ak.zeros(5, dtype=float) self.assertEqual(float, floatZeros.dtype) floatZeros = ak.zeros(5, dtype=ak.float64) self.assertEqual(ak.float64, floatZeros.dtype) boolZeros = ak.zeros(5, dtype=bool) self.assertEqual(bool, boolZeros.dtype) boolZeros = ak.zeros(5, dtype=ak.bool) self.assertEqual(ak.bool, boolZeros.dtype) zeros = ak.zeros('5') self.assertEqual(5, len(zeros)) with self.assertRaises(TypeError): ak.zeros(5, dtype=ak.uint8) with self.assertRaises(TypeError): ak.zeros(5, dtype=str)
def run_test_pdarray_index(strings, test_strings, cat): inds = ak.arange(0, strings.size, 10) assert (compare_strings(strings[inds].to_ndarray(), test_strings[inds.to_ndarray()])) assert (compare_strings(cat[inds].to_ndarray(), test_strings[inds.to_ndarray()])) logical = ak.zeros(strings.size, dtype=ak.bool) logical[inds] = True assert (compare_strings(strings[logical].to_ndarray(), test_strings[logical.to_ndarray()])) # Indexing with a one-element pdarray (int) should return Strings array, not string scalar i = N // 2 singleton = ak.array([i]) result = strings[singleton] assert (isinstance(result, ak.Strings) and (result.size == 1)) assert (result[0] == strings[i]) # Logical indexing with all-False array should return empty Strings array logicalSingleton = ak.zeros(strings.size, dtype=ak.bool) result = strings[logicalSingleton] assert (isinstance(result, ak.Strings) and (result.size == 0)) # Logical indexing with a single True should return one-element Strings array, not string scalar logicalSingleton[i] = True result = strings[logicalSingleton] assert (isinstance(result, ak.Strings) and (result.size == 1)) assert (result[0] == strings[i])
def compare_strategies(length, ncat, op, dtype): keys = ak.randint(0, ncat, length) if dtype == 'int64': vals = ak.randint(0, length//ncat, length) elif dtype == 'bool': vals = ak.zeros(length, dtype='bool') for i in np.random.randint(0, length, ncat//2): vals[i] = True else: vals = ak.linspace(-1, 1, length) print("Global groupby", end=' ') start = time() gg = ak.GroupBy(keys, False) ggtime = time() - start print(ggtime) print("Global reduce", end=' ') start = time() gk, gv = gg.aggregate(vals, op) grtime = time() - start print(grtime) print("Local groupby", end=' ') start = time() lg = ak.GroupBy(keys, True) lgtime = time() - start print(lgtime) print("Local reduce", end=' ') start = time() lk, lv = lg.aggregate(vals, op) lrtime = time() - start print(lrtime) print(f"Keys match? {(gk == lk).all()}") print(f"Absolute diff of vals = {ak.abs(gv - lv).sum()}") return ggtime, grtime, lgtime, lrtime
def testSaveAndLoadCategoricalMulti(self): """ Test to build a pseudo dataframe with multiple categoricals, pdarrays, strings objects and successfully write/read it from HDF5 """ c1 = self._getCategorical(prefix="c1", size=51) c2 = self._getCategorical(prefix="c2", size=52) pda1 = ak.zeros(51) strings1 = ak.random_strings_uniform(9, 10, 52) with tempfile.TemporaryDirectory( dir=CategoricalTest.cat_test_base_tmp) as tmp_dirname: df = {"cat1": c1, "cat2": c2, "pda1": pda1, "strings1": strings1} ak.save_all(df, f"{tmp_dirname}/cat-save-test") x = ak.load_all(path_prefix=f"{tmp_dirname}/cat-save-test") self.assertTrue(len(x.items()) == 4) # Note assertCountEqual asserts a and b have the same elements in the same amount regardless of order self.assertCountEqual(x["cat1"].categories.to_ndarray().tolist(), c1.categories.to_ndarray().tolist()) self.assertCountEqual(x["cat2"].categories.to_ndarray().tolist(), c2.categories.to_ndarray().tolist()) self.assertCountEqual(x["pda1"].to_ndarray().tolist(), pda1.to_ndarray().tolist()) self.assertCountEqual(x["strings1"].to_ndarray().tolist(), strings1.to_ndarray().tolist())
def test_zero_length_groupby(self): """ This tests groupby boundary condition on a zero length pdarray, see Issue #900 for details """ g = ak.GroupBy(ak.zeros(0, dtype=ak.int64)) str( g.segments ) # passing condition, if this was deleted it will cause the test to fail
def check_zeros(N): # create np version a = np.zeros(N) # create ak version b = ak.zeros(N) # print(a,b) c = a == b.to_ndarray() # print(type(c),c) return pass_fail(c.all())
def generate_arrays(length, nkeys, nvals, dtype='int64'): keys = ak.randint(0, nkeys, length) if dtype == 'int64': vals = ak.randint(0, nvals, length) elif dtype == 'bool': vals = ak.zeros(length, dtype='bool') for i in np.random.randint(0, length, nkeys // 2): vals[i] = True else: vals = ak.linspace(-1, 1, length) return keys, vals
def run_test_in1d(strings, cat, base_words): more_choices = ak.randint(0, UNIQUE, 100) akwords = base_words[more_choices] more_words = akwords.to_ndarray() matches = ak.in1d(strings, akwords) catmatches = ak.in1d(cat, akwords) assert((matches == catmatches).all()) # Every word in matches should be in the target set for word in strings[matches].to_ndarray(): assert(word in more_words) # Exhaustively find all matches to make sure we didn't miss any inds = ak.zeros(strings.size, dtype=ak.bool) for word in more_words: inds |= (strings == word) assert((inds == matches).all())
def test_eros_like(self): intZeros = ak.zeros(5, dtype=ak.int64) intZerosLike = ak.zeros_like(intZeros) self.assertIsInstance(intZerosLike, ak.pdarray) self.assertEqual(ak.int64, intZerosLike.dtype) floatZeros = ak.ones(5, dtype=ak.float64) floatZerosLike = ak.ones_like(floatZeros) self.assertEqual(ak.float64, floatZerosLike.dtype) boolZeros = ak.ones(5, dtype=ak.bool) boolZerosLike = ak.ones_like(boolZeros) self.assertEqual(ak.bool, boolZerosLike.dtype)
def check_float(N): a = ak.randint(0, 1, N, dtype=ak.float64) n = ak.randint(-1, 1, N, dtype=ak.float64) z = ak.zeros(N, dtype=ak.float64) perm = ak.coargsort([a]) assert ak.is_sorted(a[perm]) perm = ak.coargsort([a, n]) assert ak.is_sorted(a[perm]) perm = ak.coargsort([n, a]) assert ak.is_sorted(n[perm]) perm = ak.coargsort([z, a]) assert ak.is_sorted(a[perm]) perm = ak.coargsort([z, n]) assert ak.is_sorted(n[perm])
def check_int(N): z = ak.zeros(N, dtype=ak.int64) a2 = ak.randint(0, 2**16, N) b2 = ak.randint(0, 2**16, N) c2 = ak.randint(0, 2**16, N) d2 = ak.randint(0, 2**16, N) n2 = ak.randint(-(2**15), 2**15, N) perm = ak.coargsort([a2]) assert ak.is_sorted(a2[perm]) perm = ak.coargsort([n2]) assert ak.is_sorted(n2[perm]) perm = ak.coargsort([a2, b2, c2, d2]) assert ak.is_sorted(a2[perm]) perm = ak.coargsort([z, b2, c2, d2]) assert ak.is_sorted(b2[perm]) perm = ak.coargsort([z, z, c2, d2]) assert ak.is_sorted(c2[perm]) perm = ak.coargsort([z, z, z, d2]) assert ak.is_sorted(d2[perm]) a4 = ak.randint(0, 2**32, N) b4 = ak.randint(0, 2**32, N) n4 = ak.randint(-(2**31), 2**31, N) perm = ak.coargsort([a4]) assert ak.is_sorted(a4[perm]) perm = ak.coargsort([n4]) assert ak.is_sorted(n4[perm]) perm = ak.coargsort([a4, b4]) assert ak.is_sorted(a4[perm]) perm = ak.coargsort([b4, a4]) assert ak.is_sorted(b4[perm]) a8 = ak.randint(0, 2**64, N) b8 = ak.randint(0, 2**64, N) n8 = ak.randint(-(2**63), 2**64, N) perm = ak.coargsort([a8]) assert ak.is_sorted(a8[perm]) perm = ak.coargsort([n8]) assert ak.is_sorted(n8[perm]) perm = ak.coargsort([b8, a8]) assert ak.is_sorted(b8[perm]) from itertools import permutations all_perm = permutations([a2, a4, a8]) for p in all_perm: perm = ak.coargsort(p) assert ak.is_sorted(p[0][perm])
print("pdarray bool index passed") # in1d and iter # more_words = np.random.choice(base_words, 100) # akwords = ak.array(more_words) more_choices = ak.randint(0, UNIQUE, 100) akwords = base_words[more_choices] more_words = akwords.to_ndarray() matches = ak.in1d(strings, akwords) catmatches = ak.in1d(cat, akwords) assert ((matches == catmatches).all()) # Every word in matches should be in the target set for word in strings[matches]: assert (word in more_words) # Exhaustively find all matches to make sure we didn't miss any inds = ak.zeros(strings.size, dtype=ak.bool) for word in more_words: inds |= (strings == word) assert ((inds == matches).all()) print("in1d and iter passed") # argsort test_argsort(strings, test_strings, cat) # unique test_unique(strings, test_strings, cat) # groupby g = ak.GroupBy(strings) gc = ak.GroupBy(cat) # Unique keys should be same result as ak.unique
def test_compare_zeros(self): # create np version nArray = np.zeros(N) # create ak version aArray = ak.zeros(N) self.assertEqual(nArray.all(), aArray.to_ndarray().all())