def testPdArrayConcatenation(self): onesOne = ak.randint(0, 100, 100) onesTwo = ak.randint(0, 100, 100) result = ak.concatenate([onesOne, onesTwo]) self.assertEqual(200, len(result)) self.assertEqual(np.int64, result.dtype)
def testCast(self): N = 100 arrays = { ak.int64: ak.randint(-(2**48), 2**48, N), ak.float64: ak.randint(0, 1, N, dtype=ak.float64), ak.bool: ak.randint(0, 2, N, dtype=ak.bool) } roundtripable = set( ((ak.bool, ak.bool), (ak.int64, ak.int64), (ak.int64, ak.float64), (ak.int64, npstr), (ak.float64, ak.float64), (ak.float64, npstr), (ak.uint8, ak.int64), (ak.uint8, ak.float64), (ak.uint8, npstr))) for t1, orig in arrays.items(): for t2 in ak.DTypes: t2 = ak.dtype(t2) other = ak.cast(orig, t2) self.assertEqual(orig.size, other.size) if (t1, t2) in roundtripable: roundtrip = ak.cast(other, t1) self.assertTrue((orig == roundtrip).all(), f"{t1}: {orig[:5]}, {t2}: {roundtrip[:5]}") self.assertTrue((ak.array([1, 2, 3, 4, 5]) == ak.cast(ak.linspace(1, 5, 5), dt=ak.int64)).all()) self.assertEqual( ak.cast(ak.arange(0, 5), dt=ak.float64).dtype, ak.float64) self.assertTrue((ak.array([False, True, True, True, True]) == ak.cast(ak.linspace(0, 4, 5), dt=ak.bool)).all())
def compare_strategies(length, ncat, op, dtype): keys = ak.randint(0, ncat, length) if dtype == 'int64': vals = ak.randint(0, length//ncat, length) elif dtype == 'bool': vals = ak.zeros(length, dtype='bool') for i in np.random.randint(0, length, ncat//2): vals[i] = True else: vals = ak.linspace(-1, 1, length) print("Global groupby", end=' ') start = time() gg = ak.GroupBy(keys, False) ggtime = time() - start print(ggtime) print("Global reduce", end=' ') start = time() gk, gv = gg.aggregate(vals, op) grtime = time() - start print(grtime) print("Local groupby", end=' ') start = time() lg = ak.GroupBy(keys, True) lgtime = time() - start print(lgtime) print("Local reduce", end=' ') start = time() lk, lv = lg.aggregate(vals, op) lrtime = time() - start print(lrtime) print(f"Keys match? {(gk == lk).all()}") print(f"Absolute diff of vals = {ak.abs(gv - lv).sum()}") return ggtime, grtime, lgtime, lrtime
def testErrorHandling(self): # Test RuntimeError from bool NotImplementedError akbools = ak.randint(0, 1, 1000, dtype=ak.bool) bools = ak.randint(0, 1, 1000, dtype=bool) for algo in ak.SortingAlgorithm: with self.assertRaises(ValueError) as cm: ak.sort(akbools, algo) self.assertEqual( 'ak.sort supports int64, uint64, or float64, not bool', cm.exception.args[0]) with self.assertRaises(ValueError) as cm: ak.sort(bools, algo) self.assertEqual( 'ak.sort supports int64, uint64, or float64, not bool', cm.exception.args[0]) # Test TypeError from sort attempt on non-pdarray with self.assertRaises(TypeError): ak.sort(list(range(0, 10)), algo) # Test attempt to sort Strings object, which is unsupported with self.assertRaises(TypeError): ak.sort( ak.array(['String {}'.format(i) for i in range(0, 10)]), algo)
def setUp(self): ArkoudaTest.setUp(self) self.bvalues = ak.randint(0, 1, 10, dtype=bool) self.fvalues = ak.randint(0, 1, 10, dtype=float) self.ivalues = ak.array([4, 1, 3, 2, 2, 2, 5, 5, 2, 3]) self.igb = ak.GroupBy(self.ivalues)
def testSeededRNG(self): N = 100 seed = 8675309 numericdtypes = [ak.int64, ak.float64, ak.bool] for dt in numericdtypes: # Make sure unseeded runs differ a = ak.randint(0, 2**32, N, dtype=dt) b = ak.randint(0, 2**32, N, dtype=dt) self.assertFalse((a == b).all()) # Make sure seeded results are same a = ak.randint(0, 2**32, N, dtype=dt, seed=seed) b = ak.randint(0, 2**32, N, dtype=dt, seed=seed) self.assertTrue((a == b).all()) # Uniform self.assertFalse((ak.uniform(N) == ak.uniform(N)).all()) self.assertTrue((ak.uniform(N, seed=seed) == ak.uniform(N, seed=seed)).all()) # Standard Normal self.assertFalse((ak.standard_normal(N) == ak.standard_normal(N)).all()) self.assertTrue((ak.standard_normal(N, seed=seed) == ak.standard_normal(N, seed=seed)).all()) # Strings (uniformly distributed length) self.assertFalse((ak.random_strings_uniform(1, 10, N) == ak.random_strings_uniform(1, 10, N)).all()) self.assertTrue((ak.random_strings_uniform(1, 10, N, seed=seed) == ak.random_strings_uniform(1, 10, N, seed=seed)).all()) # Strings (log-normally distributed length) self.assertFalse((ak.random_strings_lognormal(2, 1, N) == ak.random_strings_lognormal(2, 1, N)).all()) self.assertTrue((ak.random_strings_lognormal(2, 1, N, seed=seed) == ak.random_strings_lognormal(2, 1, N, seed=seed)).all())
def check_int_float(N): f = ak.randint(0, 2**63, N, dtype=ak.float64) i = ak.randint(0, 2**63, N, dtype=ak.int64) perm = ak.coargsort([f, i]) assert ak.is_sorted(f[perm]) perm = ak.coargsort([i, f]) assert ak.is_sorted(i[perm])
def generate_arrays(length, nkeys, nvals, dtype='int64'): keys = ak.randint(0, nkeys, length) if dtype == 'int64': vals = ak.randint(0, nvals, length) elif dtype == 'bool': vals = ak.zeros(length, dtype='bool') for i in np.random.randint(0, length, nkeys // 2): vals[i] = True else: vals = ak.linspace(-1, 1, length) return keys, vals
def test_randint_with_seed(self): values = ak.randint(1, 5, 10, seed=2) self.assertTrue((ak.array([4, 3, 1, 3, 4, 4, 2, 4, 3, 2]) == values).all()) values = ak.randint(1, 5, 10, dtype=ak.float64, seed=2) self.assertTrue((ak.array([2.9160772326374946, 4.353429832157099, 4.5392023718621486, 4.4019932101126606, 3.3745324569952304, 1.1642002901528308, 4.4714086874555292, 3.7098921109084522, 4.5939589352472314, 4.0337935981006172]) == values).all()) values = ak.randint(1, 5, 10, dtype=ak.bool, seed=2) self.assertTrue((ak.array([False, True, True, True, True, False, True, True, True, True]) == values).all())
def check_int_uint_float(N, algo): f = ak.randint(0, 2**63, N, dtype=ak.float64) u = ak.randint(0, 2**63, N, dtype=ak.uint64) i = ak.randint(0, 2**63, N, dtype=ak.int64) perm = ak.coargsort([f, u, i], algo) assert ak.is_sorted(f[perm]) perm = ak.coargsort([u, i, f], algo) assert ak.is_sorted(u[perm]) perm = ak.coargsort([i, f, u], algo) assert ak.is_sorted(i[perm])
def setUp(self): ArkoudaTest.setUp(self) self.int_tens_pdarray = ak.array(np.random.randint(-100, 100, 1000)) self.int_tens_ndarray = self.int_tens_pdarray.to_ndarray() self.int_tens_ndarray.sort() self.int_tens_pdarray_dupe = ak.array( np.random.randint(-100, 100, 1000)) self.int_hundreds_pdarray = ak.array( np.random.randint(-1000, 1000, 1000)) self.int_hundreds_ndarray = self.int_hundreds_pdarray.to_ndarray() self.int_hundreds_ndarray.sort() self.int_hundreds_pdarray_dupe = ak.array( np.random.randint(-1000, 1000, 1000)) self.float_pdarray = ak.array(np.random.uniform(-100, 100, 1000)) self.float_ndarray = self.float_pdarray.to_ndarray() self.float_ndarray.sort() self.float_pdarray_dupe = ak.array(np.random.uniform(-100, 100, 1000)) self.bool_pdarray = ak.randint(0, 1, 1000, dtype=ak.bool) self.bool_pdarray_dupe = ak.randint(0, 1, 1000, dtype=ak.bool) self.dict_columns = { 'int_tens_pdarray': self.int_tens_pdarray, 'int_hundreds_pdarray': self.int_hundreds_pdarray, 'float_pdarray': self.float_pdarray, 'bool_pdarray': self.bool_pdarray } self.dict_columns_dupe = { 'int_tens_pdarray': self.int_tens_pdarray_dupe, 'int_hundreds_pdarray': self.int_hundreds_pdarray_dupe, 'float_pdarray': self.float_pdarray_dupe, 'bool_pdarray': self.bool_pdarray_dupe } self.dict_single_column = {'int_tens_pdarray': self.int_tens_pdarray} self.list_columns = [ self.int_tens_pdarray, self.int_hundreds_pdarray, self.float_pdarray, self.bool_pdarray ] self.names = [ 'int_tens_pdarray', 'int_hundreds_pdarray', 'float_pdarray', 'bool_pdarray' ] with open('{}/not-a-file_LOCALE0000'.format(IOTest.io_test_dir), 'w'): pass
def setUp(self): ArkoudaTest.setUp(self) self.int_tens_pdarray = ak.array(np.random.randint(-100,100,1000)) self.int_tens_ndarray = self.int_tens_pdarray.to_ndarray() self.int_tens_ndarray.sort() self.int_tens_pdarray_dupe = ak.array(np.random.randint(-100,100,1000)) self.int_hundreds_pdarray = ak.array(np.random.randint(-1000,1000,1000)) self.int_hundreds_ndarray = self.int_hundreds_pdarray.to_ndarray() self.int_hundreds_ndarray.sort() self.int_hundreds_pdarray_dupe = ak.array(np.random.randint(-1000,1000,1000)) self.float_pdarray = ak.array(np.random.uniform(-100,100,1000)) self.float_ndarray = self.float_pdarray.to_ndarray() self.float_ndarray.sort() self.float_pdarray_dupe = ak.array(np.random.uniform(-100,100,1000)) self.bool_pdarray = ak.randint(0, 1, 1000, dtype=ak.bool) self.bool_pdarray_dupe = ak.randint(0, 1, 1000, dtype=ak.bool) self.dict_columns = { 'int_tens_pdarray' : self.int_tens_pdarray, 'int_hundreds_pdarray' : self.int_hundreds_pdarray, 'float_pdarray' : self.float_pdarray, 'bool_pdarray' : self.bool_pdarray } self.dict_columns_dupe = { 'int_tens_pdarray' : self.int_tens_pdarray_dupe, 'int_hundreds_pdarray' : self.int_hundreds_pdarray_dupe, 'float_pdarray' : self.float_pdarray_dupe, 'bool_pdarray' : self.bool_pdarray_dupe } self.dict_single_column = { 'int_tens_pdarray' : self.int_tens_pdarray } self.list_columns = [ self.int_tens_pdarray, self.int_hundreds_pdarray, self.float_pdarray, self.bool_pdarray ] self.names = [ 'int_tens_pdarray', 'int_hundreds_pdarray', 'float_pdarray', 'bool_pdarray' ]
def testPrecision(self): # See https://github.com/Bears-R-Us/arkouda/issues/964 # Grouped sum was exacerbating floating point errors # This test verifies the fix N = 10**6 G = N // 10 ub = 2**63 // N groupnum = ak.randint(0, G, N, seed=1) intval = ak.randint(0, ub, N, seed=2) floatval = ak.cast(intval, ak.float64) g = ak.GroupBy(groupnum) _, intmean = g.mean(intval) _, floatmean = g.mean(floatval) ak_mse = ak.mean((intmean - floatmean)**2) self.assertTrue(np.isclose(ak_mse, 0.0))
def setUp(self): self.maxDiff = None ArkoudaTest.setUp(self) base_words1 = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') base_words2 = ak.random_strings_lognormal(2, 0.25, UNIQUE, characters='printable') gremlins = np.array(['"', ' ', '']) self.gremlins = ak.array(gremlins) self.base_words = ak.concatenate((base_words1, base_words2)) self.np_base_words = np.hstack( (base_words1.to_ndarray(), base_words2.to_ndarray())) choices = ak.randint(0, self.base_words.size, N) self.strings = self.base_words[choices] self.test_strings = self.strings.to_ndarray() self.cat = ak.Categorical(self.strings) x, w = tuple( zip(*Counter(''.join(self.base_words.to_ndarray())).items())) self.delim = self._get_delimiter(x, w, gremlins) self.akset = set(ak.unique(self.strings).to_ndarray()) self.gremlins_base_words = ak.concatenate( (self.base_words, self.gremlins)) self.gremlins_strings = ak.concatenate( (self.base_words[choices], self.gremlins)) self.gremlins_test_strings = self.gremlins_strings.to_ndarray() self.gremlins_cat = ak.Categorical(self.gremlins_strings)
def testHistogram(self): pda = ak.randint(10, 30, 40) result = ak.histogram(pda, bins=20) self.assertIsInstance(result, ak.pdarray) self.assertEqual(20, len(result)) self.assertEqual(int, result.dtype) with self.assertRaises(TypeError) as cm: ak.histogram([range(0, 10)], bins=1) self.assertEqual( 'type of argument "pda" must be arkouda.pdarrayclass.pdarray; got list instead', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.histogram(pda, bins='1') self.assertEqual( 'type of argument "bins" must be int; got str instead', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.histogram([range(0, 10)], bins='1') self.assertEqual( 'type of argument "pda" must be arkouda.pdarrayclass.pdarray; got list instead', cm.exception.args[0])
def testBitBoundary(self): # test 17-bit sort L = -2**15 U = 2**16 a = ak.randint(L, U, 100) for algo in ak.SortingAlgorithm: assert ak.is_sorted(ak.sort(a, algo))
def test_parquet(self): ak_arr = ak.randint(0, 2**32, SIZE) ak_arr.save_parquet("pq_testcorrect", "my-dset") pq_arr = ak.read_parquet("pq_testcorrect*", "my-dset") self.assertTrue((ak_arr == pq_arr).all()) for f in glob.glob('pq_test*'): os.remove(f)
def check_float(N): a = ak.randint(0, 1, N, dtype=ak.float64) n = ak.randint(-1, 1, N, dtype=ak.float64) z = ak.zeros(N, dtype=ak.float64) perm = ak.coargsort([a]) assert ak.is_sorted(a[perm]) perm = ak.coargsort([a, n]) assert ak.is_sorted(a[perm]) perm = ak.coargsort([n, a]) assert ak.is_sorted(n[perm]) perm = ak.coargsort([z, a]) assert ak.is_sorted(a[perm]) perm = ak.coargsort([z, n]) assert ak.is_sorted(n[perm])
def test_wrong_dset_name(self): ak_arr = ak.randint(0, 2**32, SIZE) ak_arr.save_parquet("pq_test", "test-dset-name") with self.assertRaises(RuntimeError) as cm: ak.read_parquet("pq_test*", "wrong-dset-name") self.assertIn("wrong-dset-name does not exist in file", cm.exception.args[0]) for f in glob.glob("pq_test*"): os.remove(f)
def testCast(self): N = 100 arrays = { ak.int64: ak.randint(-(2**48), 2**48, N), ak.float64: ak.randint(0, 1, N, dtype=ak.float64), ak.bool: ak.randint(0, 2, N, dtype=ak.bool) } roundtripable = set( ((ak.bool, ak.bool), (ak.int64, ak.int64), (ak.int64, ak.float64), (ak.int64, akstr), (ak.float64, ak.float64), (ak.float64, akstr), (ak.uint8, ak.int64), (ak.uint8, ak.float64), (ak.uint8, akstr))) for t1, orig in arrays.items(): for t2 in ak.DTypes: t2 = ak.dtype(t2) other = ak.cast(orig, t2) self.assertEqual(orig.size, other.size) if (t1, t2) in roundtripable: roundtrip = ak.cast(other, t1) self.assertTrue((orig == roundtrip).all(), f"{t1}: {orig[:5]}, {t2}: {roundtrip[:5]}")
def run_test_in1d(strings, cat, base_words): more_choices = ak.randint(0, UNIQUE, 100) akwords = base_words[more_choices] more_words = akwords.to_ndarray() matches = ak.in1d(strings, akwords) catmatches = ak.in1d(cat, akwords) assert((matches == catmatches).all()) # Every word in matches should be in the target set for word in strings[matches].to_ndarray(): assert(word in more_words) # Exhaustively find all matches to make sure we didn't miss any inds = ak.zeros(strings.size, dtype=ak.bool) for word in more_words: inds |= (strings == word) assert((inds == matches).all())
def test_error_handling(self): with self.assertRaises(RuntimeError) as cm: ak.concatenate([ak.ones(100), ak.array([True])]) self.assertEqual( 'Error: concatenateMsg: Incompatible arguments: ' + 'Expected float64 dtype but got bool dtype', cm.exception.args[0]) with self.assertRaises(TypeError): ak.union1d([-1, 0, 1], [-2, 0, 2]) with self.assertRaises(RuntimeError) as cm: ak.cos(ak.randint(0, 1, 100, dtype=ak.bool)) self.assertEqual('Error: efuncMsg: cos bool not implemented', cm.exception.args[0])
def test_multi_file(self): adjusted_size = int(SIZE / NUMFILES) * NUMFILES test_arrs = [] elems = ak.randint(0, 2**32, adjusted_size) per_arr = int(adjusted_size / NUMFILES) for i in range(NUMFILES): test_arrs.append(elems[(i * per_arr):(i * per_arr) + per_arr]) test_arrs[i].save_parquet("pq_test" + str(i), "test-dset") pq_arr = ak.read_parquet("pq_test*", "test-dset") self.assertTrue((elems == pq_arr).all()) for f in glob.glob('pq_test*'): os.remove(f)
def testErrorHandling(self): # Test RuntimeError from bool NotImplementedError bools = ak.randint(0, 1, 1000, dtype=ak.bool) with self.assertRaises(RuntimeError) as cm: ak.sort(bools) self.assertEqual('Error: sortMsg: bool not implemented', cm.exception.args[0]) # Test TypeError from sort attempt on non-pdarray with self.assertRaises(TypeError): ak.sort(list(range(0,10))) # Test attempt to sort Strings object, which is unsupported with self.assertRaises(TypeError): ak.sort(ak.array(['String {}'.format(i) for i in range(0,10)]))
def testIn1d(self): vals = [i % 3 for i in range(10)] valsTwo = [i % 2 for i in range(10)] stringsOne = ak.array(['String {}'.format(i) for i in vals]) stringsTwo = ak.array(['String {}'.format(i) for i in valsTwo]) catOne = ak.Categorical(stringsOne) catTwo = ak.Categorical(stringsTwo) answer = ak.array([x < 2 for x in vals]) self.assertTrue((answer == ak.in1d(catOne,catTwo)).all()) self.assertTrue((answer == ak.in1d(catOne,stringsTwo)).all()) with self.assertRaises(TypeError) as cm: ak.in1d(catOne, ak.randint(0,5,5)) self.assertEqual(('type of argument "test" must be one of (Strings, Categorical); got ' + 'arkouda.pdarrayclass.pdarray instead'), cm.exception.args[0])
def setUp(self): ArkoudaTest.setUp(self) base_words1 = ak.random_strings_uniform(0, 10, UNIQUE, characters='printable') base_words2 = ak.random_strings_lognormal(2, 0.25, UNIQUE, characters='printable') self.base_words = ak.concatenate((base_words1, base_words2)) self.np_base_words = np.hstack( (base_words1.to_ndarray(), base_words2.to_ndarray())) choices = ak.randint(0, self.base_words.size, N) self.strings = self.base_words[choices] self.test_strings = self.strings.to_ndarray() self.cat = ak.Categorical(self.strings) x, w = tuple(zip(*Counter(''.join(self.base_words)).items())) self.delim = np.random.choice(x, p=(np.array(w) / sum(w)))
def testErrorHandling(self): # Test NotImplmentedError that prevents pddarray iteration with self.assertRaises(NotImplementedError): iter(ak.ones(100)) # Test NotImplmentedError that prevents Strings iteration with self.assertRaises(NotImplementedError): iter(ak.array(['String {}'.format(i) for i in range(0, 10)])) # Test ak,histogram against unsupported dtype with self.assertRaises(ValueError): ak.histogram((ak.randint(0, 1, 100, dtype=ak.bool))) with self.assertRaises(RuntimeError) as cm: ak.concatenate([ak.array([True]), ak.array([True])]).is_sorted() self.assertEqual('Error: reductionMsg: is_sorted bool not implemented', cm.exception.args[0]) with self.assertRaises(TypeError): ak.ones(100).any([0])
def test_error_handling(self): testArray = ak.randint(0, 100, 100) with self.assertRaises(TypeError) as cm: ak.mink(list(range(0, 10)), 1) self.assertEqual( 'type of argument "pda" must be arkouda.pdarrayclass.pdarray; got list instead', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.mink(testArray, '1') self.assertEqual('type of argument "k" must be int; got str instead', cm.exception.args[0]) with self.assertRaises(ValueError) as cm: ak.mink(testArray, -1) self.assertEqual("k must be 1 or greater", cm.exception.args[0]) with self.assertRaises(ValueError) as cm: ak.mink(ak.array([]), 1) self.assertEqual("must be a non-empty pdarray of type int or float", cm.exception.args[0])
def setUp(self): self.maxDiff = None ArkoudaTest.setUp(self) base_words1 = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable') base_words2 = ak.random_strings_lognormal(2, 0.25, UNIQUE, characters='printable') gremlins = ak.array(['"', ' ', '']) self.gremlins = gremlins self.base_words = ak.concatenate((base_words1, base_words2)) self.np_base_words = np.hstack((base_words1.to_ndarray(), base_words2.to_ndarray())) choices = ak.randint(0, self.base_words.size, N) self.strings = self.base_words[choices] self.test_strings = self.strings.to_ndarray() self.cat = ak.Categorical(self.strings) x, w = tuple(zip(*Counter(''.join(self.base_words.to_ndarray())).items())) self.delim = np.random.choice(x, p=(np.array(w)/sum(w))) self.akset = set(ak.unique(self.strings).to_ndarray()) self.gremlins_base_words = base_words = ak.concatenate((base_words1, base_words2, gremlins)) self.gremlins_strings = ak.concatenate((base_words[choices], gremlins)) self.gremlins_test_strings = self.gremlins_strings.to_ndarray() self.gremlins_cat = ak.Categorical(self.gremlins_strings) print("=================In Class will check===========================") print("") print(str(base_words1)) print("After base_word1 ") print("") print(str(self.strings)) print("After Print strings") print(str(self.test_strings)) print("") print("After Print teststrings") print(str(self.strings[N//3])) print("") print("After Print strings[N//3]") print(str(self.test_strings[N//3])) print("") print("After Print test_strings[N//3]")
def testUniform(self): testArray = ak.uniform(3) self.assertIsInstance(testArray, ak.pdarray) self.assertEqual(ak.float64, testArray.dtype) self.assertEqual([3], testArray.shape) with self.assertRaises(TypeError): ak.uniform(low=5) with self.assertRaises(TypeError) as cm: ak.randint(low='0', high=5, size=100) self.assertEqual("The low parameter must be an integer or float", cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.randint(low=0, high='5', size=100) self.assertEqual("The high parameter must be an integer or float", cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.randint(low=0, high=5, size='100') self.assertEqual("The size parameter must be an integer", cm.exception.args[0])