def test_str_repr(self): """ Test 3 different types: int, float, bool with lengths under/over threshold Do this for both __str__() and __repr__() """ ak.client.pdarrayIterThresh = 5 # Test __str__() self.assertEqual("[1 2 3]", ak.array([1, 2, 3]).__str__()) self.assertEqual("[1 2 3 ... 17 18 19]", ak.arange(1, 20).__str__()) self.assertEqual("[1.100000e+00 2.300000e+00 5.000000e+00]", ak.array([1.1, 2.3, 5]).__str__()) self.assertEqual( "[0.000000e+00 5.263158e-01 1.052632e+00 ... 8.947368e+00 9.473684e+00 1.000000e+01]", ak.linspace(0, 10, 20).__str__()) self.assertEqual("[False False False]", ak.isnan(ak.array([1.1, 2.3, 5])).__str__()) self.assertEqual("[False False False ... False False False]", ak.isnan(ak.linspace(0, 10, 20)).__str__()) # Test __repr__() self.assertEqual("array([1 2 3])", ak.array([1, 2, 3]).__repr__()) self.assertEqual("array([1 2 3 ... 17 18 19])", ak.arange(1, 20).__repr__()) self.assertEqual("array([1.1000000000000001 2.2999999999999998 5])", ak.array([1.1, 2.3, 5]).__repr__()) self.assertEqual( "array([0 0.52631578947368418 1.0526315789473684 ... 8.9473684210526319 9.473684210526315 10])", ak.linspace(0, 10, 20).__repr__()) self.assertEqual("array([False False False])", ak.isnan(ak.array([1.1, 2.3, 5])).__repr__()) self.assertEqual("array([False False False ... False False False])", ak.isnan(ak.linspace(0, 10, 20)).__repr__()) ak.client.pdarrayIterThresh = ak.client.pdarrayIterThreshDefVal # Don't forget to set this back for other tests.
def testConcatenate(self): pdaOne = ak.arange(1, 4) pdaTwo = ak.arange(4, 7) self.assertTrue((ak.array([1, 2, 3, 4, 5, 6]) == ak.concatenate([pdaOne, pdaTwo])).all()) self.assertTrue((ak.array([4, 5, 6, 1, 2, 3]) == ak.concatenate([pdaTwo, pdaOne])).all()) pdaOne = ak.linspace(start=1, stop=3, length=3) pdaTwo = ak.linspace(start=4, stop=6, length=3) self.assertTrue((ak.array([1, 2, 3, 4, 5, 6]) == ak.concatenate([pdaOne, pdaTwo])).all()) self.assertTrue((ak.array([4, 5, 6, 1, 2, 3]) == ak.concatenate([pdaTwo, pdaOne])).all()) pdaOne = ak.array([True, False, True]) pdaTwo = ak.array([False, True, True]) self.assertTrue((ak.array([True, False, True, False, True, True]) == ak.concatenate([pdaOne, pdaTwo])).all())
def testHdfUnsanitizedNames(self): # Test when quotes are part of the dataset name my_arrays = {'foo"0"': ak.arange(100), 'bar"': ak.arange(100)} with tempfile.TemporaryDirectory( dir=IOTest.io_test_dir) as tmp_dirname: ak.save_all(my_arrays, f"{tmp_dirname}/bad_dataset_names") ak.read_all(f"{tmp_dirname}/bad_dataset_names*")
def test_arange(self): self.assertTrue((ak.array([0, 1, 2, 3, 4]) == ak.arange(0, 5, 1)).all()) self.assertTrue((ak.array([5, 4, 3, 2, 1]) == ak.arange(5, 0, -1)).all()) self.assertTrue((ak.array([-5, -6, -7, -8, -9]) == ak.arange(-5, -10, -1)).all()) self.assertTrue((ak.array([0, 2, 4, 6, 8]) == ak.arange(0, 10, 2)).all())
def test_standalone_broadcast(self): segs = ak.arange(10)**2 vals = ak.arange(10) size = 100 check = ((2 * vals + 1) * vals).sum() self.assertTrue(ak.broadcast(segs, vals, size=size).sum() == check) perm = ak.arange(99, -1, -1) bcast = ak.broadcast(segs, vals, permutation=perm) self.assertTrue(bcast.sum() == check) self.assertTrue((bcast[:-1] >= bcast[1:]).all())
def test_compare_set_integer_iv(self): # create np version a = np.arange(N) iv = np.arange(N // 2) a[iv] = iv * 10 # create ak version b = ak.arange(N) iv = ak.arange(N // 2) b[iv] = iv * 10 self.assertEqual(a.all(), b.to_ndarray().all())
def check_get_integer_iv(self): # create np version a = np.arange(N) iv = np.arange(N // 2) a = a[iv] # create ak version b = ak.arange(N) iv = ak.arange(N // 2) b = b[iv] self.assertEqual(a.all(), b.to_ndarray().all())
def test_flatten(self): orig = ak.array(['one|two', 'three|four|five', 'six']) flat, mapping = orig.flatten('|', return_segments=True) ans = ak.array(['one', 'two', 'three', 'four', 'five', 'six']) ans2 = ak.array([0, 2, 5]) self.assertTrue((flat == ans).all()) self.assertTrue((mapping == ans2).all()) thirds = [ak.cast(ak.arange(i, 99, 3), 'str') for i in range(3)] thickrange = thirds[0].stick(thirds[1], delimiter=', ').stick(thirds[2], delimiter=', ') flatrange = thickrange.flatten(', ') self.assertTrue((ak.cast(flatrange, 'int64') == ak.arange(99)).all())
def testHash(self): h1, h2 = ak.hash(ak.arange(10)) rev = ak.arange(9, -1, -1) h3, h4 = ak.hash(rev) self.assertTrue((h1 == h3[rev]).all() and (h2 == h4[rev]).all()) h1 = ak.hash(ak.arange(10), full=False) h3 = ak.hash(rev, full=False) self.assertTrue((h1 == h3[rev]).all()) h = ak.hash(ak.linspace(0, 10, 10)) self.assertTrue((h[0].dtype == ak.int64) and (h[1].dtype == ak.int64))
def testAbs(self): na = np.linspace(1,10,10) pda = ak.array(na) self.assertTrue((np.abs(na) == ak.abs(pda).to_ndarray()).all()) self.assertTrue((ak.arange(5,1,-1) == ak.abs(ak.arange(-5,-1))).all()) self.assertTrue((ak.array([5,4,3,2,1]) == ak.abs(ak.linspace(-5,-1,5))).all()) with self.assertRaises(TypeError) as cm: ak.abs([range(0,10)]) self.assertEqual('type of argument "pda" must be arkouda.pdarrayclass.pdarray; got list instead', cm.exception.args[0])
def check_set_integer_iv(N): # create np version a = np.arange(N) iv = np.arange(N // 2) a[iv] = iv * 10 # create ak version b = ak.arange(N) iv = ak.arange(N // 2) b[iv] = iv * 10 # print(a,b) c = a == b.to_ndarray() # print(type(c),c) return pass_fail(c.all())
def testCast(self): N = 100 arrays = { ak.int64: ak.randint(-(2**48), 2**48, N), ak.float64: ak.randint(0, 1, N, dtype=ak.float64), ak.bool: ak.randint(0, 2, N, dtype=ak.bool) } roundtripable = set( ((ak.bool, ak.bool), (ak.int64, ak.int64), (ak.int64, ak.float64), (ak.int64, npstr), (ak.float64, ak.float64), (ak.float64, npstr), (ak.uint8, ak.int64), (ak.uint8, ak.float64), (ak.uint8, npstr))) for t1, orig in arrays.items(): for t2 in ak.DTypes: t2 = ak.dtype(t2) other = ak.cast(orig, t2) self.assertEqual(orig.size, other.size) if (t1, t2) in roundtripable: roundtrip = ak.cast(other, t1) self.assertTrue((orig == roundtrip).all(), f"{t1}: {orig[:5]}, {t2}: {roundtrip[:5]}") self.assertTrue((ak.array([1, 2, 3, 4, 5]) == ak.cast(ak.linspace(1, 5, 5), dt=ak.int64)).all()) self.assertEqual( ak.cast(ak.arange(0, 5), dt=ak.float64).dtype, ak.float64) self.assertTrue((ak.array([False, True, True, True, True]) == ak.cast(ak.linspace(0, 4, 5), dt=ak.bool)).all())
def test_multi_level_categorical(self): string = ak.array(['a', 'b', 'a', 'b', 'c']) cat = ak.Categorical(string) cat_from_codes = ak.Categorical.from_codes( codes=ak.array([0, 1, 0, 1, 2]), categories=ak.array(['a', 'b', 'c'])) i = ak.arange(string.size) expected = {('a', 'a'): 2, ('b', 'b'): 2, ('c', 'c'): 1} # list of 2 strings str_grouping = ak.GroupBy([string, string]) str_labels, str_values = str_grouping.nunique(i) str_dict = to_tuple_dict(str_labels, str_values) self.assertDictEqual(expected, str_dict) # list of 2 cats (one from_codes) cat_grouping = ak.GroupBy([cat, cat_from_codes]) cat_labels, cat_values = cat_grouping.nunique(i) cat_dict = to_tuple_dict(cat_labels, cat_values) self.assertDictEqual(expected, cat_dict) # One cat (from_codes) and one string mixed_grouping = ak.GroupBy([cat_from_codes, string]) mixed_labels, mixed_values = mixed_grouping.nunique(i) mixed_dict = to_tuple_dict(mixed_labels, mixed_values) self.assertDictEqual(expected, mixed_dict)
def run_test_pdarray_index(strings, test_strings, cat): inds = ak.arange(0, strings.size, 10) assert (compare_strings(strings[inds].to_ndarray(), test_strings[inds.to_ndarray()])) assert (compare_strings(cat[inds].to_ndarray(), test_strings[inds.to_ndarray()])) logical = ak.zeros(strings.size, dtype=ak.bool) logical[inds] = True assert (compare_strings(strings[logical].to_ndarray(), test_strings[logical.to_ndarray()])) # Indexing with a one-element pdarray (int) should return Strings array, not string scalar i = N // 2 singleton = ak.array([i]) result = strings[singleton] assert (isinstance(result, ak.Strings) and (result.size == 1)) assert (result[0] == strings[i]) # Logical indexing with all-False array should return empty Strings array logicalSingleton = ak.zeros(strings.size, dtype=ak.bool) result = strings[logicalSingleton] assert (isinstance(result, ak.Strings) and (result.size == 0)) # Logical indexing with a single True should return one-element Strings array, not string scalar logicalSingleton[i] = True result = strings[logicalSingleton] assert (isinstance(result, ak.Strings) and (result.size == 1)) assert (result[0] == strings[i])
def check_set_bool_iv(self): # create np version a = np.arange(N) a[a < N // 2] = a[:N // 2] * -1 # create ak version b = ak.arange(N) b[b < N // 2] = b[:N // 2] * -1 self.assertEqual(a.all(), b.to_ndarray().all())
def test_compare_get_bool_iv(self): # create np version a = np.arange(N) a = a[a < N // 2] # create ak version b = ak.arange(N) b = b[b < N // 2] self.assertEqual(a.all(), b.to_ndarray().all())
def test_compare_set_bool_iv_value(self): # create np version a = np.arange(N) a[a < N // 2] = -1 # create ak version b = ak.arange(N) b[b < N // 2] = -1 self.assertEqual(a.all(), b.to_ndarray().all())
def check_get_integer_idx(N): # create np version a = np.arange(N) v1 = a[N // 2] # create ak version b = ak.arange(N) v2 = b[N // 2] return pass_fail(v1 == v2) and pass_fail(a[-1] == b[-1])
def test_compare_get_integer_idx(self): # create np version a = np.arange(N) v1 = a[N // 2] # create ak version b = ak.arange(N) v2 = b[N // 2] self.assertEqual(v1, v2) self.assertEqual(a[-1], b[-1])
def setUp(self): ArkoudaTest.setUp(self) self.N = 1000 self.a1 = ak.ones(self.N, dtype=np.int64) self.a2 = ak.arange(0, self.N, 1) self.t1 = self.a1 self.t2 = self.a1 * 10 self.dt = 10 ak.verbose = False
def check_arange(N): # create np version a = np.arange(N) # create ak version b = ak.arange(N) # print(a,b) c = a == b.to_ndarray() # print(type(c),c) return pass_fail(c.all())
def test_compare_sort(self): # create np version a = np.arange(N) a = a[::-1] a = np.sort(a) # create ak version b = ak.arange(N) b = b[::-1] b = ak.sort(b) self.assertEqual(a.all(), b.to_ndarray().all())
def check_set_bool_iv(N): # create np version a = np.arange(N) a[a < N // 2] = a[:N // 2] * -1 # create ak version b = ak.arange(N) b[b < N // 2] = b[:N // 2] * -1 # print(a,b) c = a == b.to_ndarray() # print(type(c),c) return pass_fail(c.all())
def test_compare_argsort(self): # create np version a = np.arange(N) a = a[::-1] iv = np.argsort(a) a = a[iv] # create ak version b = ak.arange(N) b = b[::-1] iv = ak.argsort(b) b = b[iv]
def check_bool(N): a = ak.arange(N) b = ak.ones(N) try: c = a and b except ValueError: correct = True except: correct = False d = ak.array([1]) correct = correct and (d and 5) return pass_fail(correct)
def test_aggregate_strings(self): s = ak.array(['a', 'b', 'a', 'b', 'c']) i = ak.arange(s.size) grouping = ak.GroupBy(s) labels, values = grouping.nunique(i) expected = {'a': 2, 'b': 2, 'c': 1} actual = { label: value for (label, value) in zip(labels.to_ndarray(), values.to_ndarray()) } self.assertDictEqual(expected, actual)
def check_sort(N): # create np version a = np.arange(N) a = a[::-1] a = np.sort(a) # create ak version b = ak.arange(N) b = b[::-1] b = ak.sort(b) # print(a,b) c = a == b.to_ndarray() # print(type(c),c) return pass_fail(c.all())
def testBaseCategorical(self): cat = self._getCategorical() self.assertTrue((ak.array([7,5,9,8,2,1,4,0,3,6]) == cat.codes).all()) self.assertTrue((ak.array([0,1,2,3,4,5,6,7,8,9]) == cat.segments).all()) self.assertTrue((ak.array(['string 8', 'string 6', 'string 5', 'string 9', 'string 7', 'string 2', 'string 10', 'string 1', 'string 4', 'string 3']) == cat.categories).all()) self.assertEqual(10,cat.size) self.assertEqual('category',cat.objtype) with self.assertRaises(ValueError) as cm: ak.Categorical(ak.arange(0,5,10)) self.assertEqual('Categorical: inputs other than Strings not yet supported', cm.exception.args[0])
def check_coargsort(N): # create np version a = np.arange(N) a = a[::-1] iv = np.lexsort([a, a]) a = a[iv] # create ak version b = ak.arange(N) b = b[::-1] iv = ak.coargsort([b, b]) b = b[iv] # print(a,b) c = a == b.to_ndarray() # print(type(c),c) return pass_fail(c.all())
def testStrictTypes(self): N = 100 prefix = '{}/strict-type-test'.format(IOTest.io_test_dir) inttypes = [np.uint32, np.int64, np.uint16, np.int16] floattypes = [np.float32, np.float64, np.float32, np.float64] for i, (it, ft) in enumerate(zip(inttypes, floattypes)): with h5py.File('{}-{}'.format(prefix, i), 'w') as f: idata = np.arange(i*N, (i+1)*N, dtype=it) f.create_dataset('integers', data=idata) fdata = np.arange(i*N, (i+1)*N, dtype=ft) f.create_dataset('floats', data=fdata) with self.assertRaises(RuntimeError) as cm: a = ak.read_all(prefix+'*') self.assertTrue('Inconsistent precision or sign' in cm.exception.args[0]) a = ak.read_all(prefix+'*', strictTypes=False) self.assertTrue((a['integers'] == ak.arange(len(inttypes)*N)).all()) self.assertTrue(np.allclose(a['floats'].to_ndarray(), np.arange(len(floattypes)*N, dtype=np.float64)))