def testConcatenate(self): pdaOne = ak.arange(1, 4) pdaTwo = ak.arange(4, 7) self.assertTrue((ak.array([1, 2, 3, 4, 5, 6]) == ak.concatenate([pdaOne, pdaTwo])).all()) self.assertTrue((ak.array([4, 5, 6, 1, 2, 3]) == ak.concatenate([pdaTwo, pdaOne])).all()) pdaOne = ak.linspace(start=1, stop=3, length=3) pdaTwo = ak.linspace(start=4, stop=6, length=3) self.assertTrue((ak.array([1, 2, 3, 4, 5, 6]) == ak.concatenate([pdaOne, pdaTwo])).all()) self.assertTrue((ak.array([4, 5, 6, 1, 2, 3]) == ak.concatenate([pdaTwo, pdaOne])).all()) pdaOne = ak.array([True, False, True]) pdaTwo = ak.array([False, True, True]) self.assertTrue((ak.array([True, False, True, False, True, True]) == ak.concatenate([pdaOne, pdaTwo])).all())
def test_str_repr(self): """ Test 3 different types: int, float, bool with lengths under/over threshold Do this for both __str__() and __repr__() """ ak.client.pdarrayIterThresh = 5 # Test __str__() self.assertEqual("[1 2 3]", ak.array([1, 2, 3]).__str__()) self.assertEqual("[1 2 3 ... 17 18 19]", ak.arange(1, 20).__str__()) self.assertEqual("[1.100000e+00 2.300000e+00 5.000000e+00]", ak.array([1.1, 2.3, 5]).__str__()) self.assertEqual( "[0.000000e+00 5.263158e-01 1.052632e+00 ... 8.947368e+00 9.473684e+00 1.000000e+01]", ak.linspace(0, 10, 20).__str__()) self.assertEqual("[False False False]", ak.isnan(ak.array([1.1, 2.3, 5])).__str__()) self.assertEqual("[False False False ... False False False]", ak.isnan(ak.linspace(0, 10, 20)).__str__()) # Test __repr__() self.assertEqual("array([1 2 3])", ak.array([1, 2, 3]).__repr__()) self.assertEqual("array([1 2 3 ... 17 18 19])", ak.arange(1, 20).__repr__()) self.assertEqual("array([1.1000000000000001 2.2999999999999998 5])", ak.array([1.1, 2.3, 5]).__repr__()) self.assertEqual( "array([0 0.52631578947368418 1.0526315789473684 ... 8.9473684210526319 9.473684210526315 10])", ak.linspace(0, 10, 20).__repr__()) self.assertEqual("array([False False False])", ak.isnan(ak.array([1.1, 2.3, 5])).__repr__()) self.assertEqual("array([False False False ... False False False])", ak.isnan(ak.linspace(0, 10, 20)).__repr__()) ak.client.pdarrayIterThresh = ak.client.pdarrayIterThreshDefVal # Don't forget to set this back for other tests.
def testCast(self): N = 100 arrays = { ak.int64: ak.randint(-(2**48), 2**48, N), ak.float64: ak.randint(0, 1, N, dtype=ak.float64), ak.bool: ak.randint(0, 2, N, dtype=ak.bool) } roundtripable = set( ((ak.bool, ak.bool), (ak.int64, ak.int64), (ak.int64, ak.float64), (ak.int64, npstr), (ak.float64, ak.float64), (ak.float64, npstr), (ak.uint8, ak.int64), (ak.uint8, ak.float64), (ak.uint8, npstr))) for t1, orig in arrays.items(): for t2 in ak.DTypes: t2 = ak.dtype(t2) other = ak.cast(orig, t2) self.assertEqual(orig.size, other.size) if (t1, t2) in roundtripable: roundtrip = ak.cast(other, t1) self.assertTrue((orig == roundtrip).all(), f"{t1}: {orig[:5]}, {t2}: {roundtrip[:5]}") self.assertTrue((ak.array([1, 2, 3, 4, 5]) == ak.cast(ak.linspace(1, 5, 5), dt=ak.int64)).all()) self.assertEqual( ak.cast(ak.arange(0, 5), dt=ak.float64).dtype, ak.float64) self.assertTrue((ak.array([False, True, True, True, True]) == ak.cast(ak.linspace(0, 4, 5), dt=ak.bool)).all())
def test_linspace(self): pda = ak.linspace(0, 100, 1000) self.assertEqual(1000, len(pda)) self.assertEqual(float, pda.dtype) self.assertIsInstance(pda, ak.pdarray) pda = ak.linspace(0.0, 100.0, 150) pda = ak.linspace(start=5, stop=0, length=6) self.assertEqual(5.0000, pda[0]) self.assertEqual(0.0000, pda[5]) pda = ak.linspace(start=5.0, stop=0.0, length=6) self.assertEqual(5.0000, pda[0]) self.assertEqual(0.0000, pda[5]) with self.assertRaises(TypeError) as cm: ak.linspace(0, '100', 1000) self.assertEqual(( 'type of argument "stop" must be one of (float, int); got str instead' ), cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.linspace('0', 100, 1000) self.assertEqual(( 'type of argument "start" must be one of (float, int); got str instead' ), cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.linspace(0, 100, '1000') self.assertEqual( 'type of argument "length" must be int; got str instead', cm.exception.args[0])
def testLinspace(self): pda = ak.linspace(0, 100, 1000) self.assertEqual(1000, len(pda)) self.assertEqual(float, pda.dtype) self.assertIsInstance(pda, ak.pdarray) pda = ak.linspace(start=5, stop=0, length=6) self.assertEqual(5.0000, pda[0]) self.assertEqual(0.0000, pda[5]) with self.assertRaises(TypeError) as cm: ak.linspace(0,'100', 1000) self.assertEqual(("The stop parameter must be an int or a" + " scalar that can be parsed to an int, but is a 'str'"), cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.linspace('0',100, 1000) self.assertEqual(("The start parameter must be an int or a" + " scalar that can be parsed to an int, but is a 'str'"), cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.linspace(0,100,'1000') self.assertEqual("The length parameter must be an int64", cm.exception.args[0])
def compare_strategies(length, ncat, op, dtype): keys = ak.randint(0, ncat, length) if dtype == 'int64': vals = ak.randint(0, length//ncat, length) elif dtype == 'bool': vals = ak.zeros(length, dtype='bool') for i in np.random.randint(0, length, ncat//2): vals[i] = True else: vals = ak.linspace(-1, 1, length) print("Global groupby", end=' ') start = time() gg = ak.GroupBy(keys, False) ggtime = time() - start print(ggtime) print("Global reduce", end=' ') start = time() gk, gv = gg.aggregate(vals, op) grtime = time() - start print(grtime) print("Local groupby", end=' ') start = time() lg = ak.GroupBy(keys, True) lgtime = time() - start print(lgtime) print("Local reduce", end=' ') start = time() lk, lv = lg.aggregate(vals, op) lrtime = time() - start print(lrtime) print(f"Keys match? {(gk == lk).all()}") print(f"Absolute diff of vals = {ak.abs(gv - lv).sum()}") return ggtime, grtime, lgtime, lrtime
def test_compare_linspace(self): # create np version a = np.linspace(10, 20, N) # create ak version b = ak.linspace(10, 20, N) # print(a,b) self.assertTrue(np.allclose(a, b.to_ndarray()))
def check_linspace(N): # create np version a = np.linspace(10, 20, N) # create ak version b = ak.linspace(10, 20, N) # print(a,b) f = np.allclose(a, b.to_ndarray()) return pass_fail(f)
def generate_arrays(length, nkeys, nvals, dtype='int64'): keys = ak.randint(0, nkeys, length) if dtype == 'int64': vals = ak.randint(0, nvals, length) elif dtype == 'bool': vals = ak.zeros(length, dtype='bool') for i in np.random.randint(0, length, nkeys // 2): vals[i] = True else: vals = ak.linspace(-1, 1, length) return keys, vals
def testCumSum(self): pda = ak.linspace(1,10,10) result = ak.cumsum(pda) self.assertIsInstance(result, ak.pdarray) self.assertEqual(10, len(result)) self.assertEqual(float, result.dtype) with self.assertRaises(TypeError) as cm: ak.cumsum([range(0,10)]) self.assertEqual('type of argument "pda" must be arkouda.pdarrayclass.pdarray; got list instead', cm.exception.args[0])
def testHash(self): h1, h2 = ak.hash(ak.arange(10)) rev = ak.arange(9, -1, -1) h3, h4 = ak.hash(rev) self.assertTrue((h1 == h3[rev]).all() and (h2 == h4[rev]).all()) h1 = ak.hash(ak.arange(10), full=False) h3 = ak.hash(rev, full=False) self.assertTrue((h1 == h3[rev]).all()) h = ak.hash(ak.linspace(0, 10, 10)) self.assertTrue((h[0].dtype == ak.int64) and (h[1].dtype == ak.int64))
def testAbs(self): na = np.linspace(1,10,10) pda = ak.array(na) self.assertTrue((np.abs(na) == ak.abs(pda).to_ndarray()).all()) self.assertTrue((ak.arange(5,1,-1) == ak.abs(ak.arange(-5,-1))).all()) self.assertTrue((ak.array([5,4,3,2,1]) == ak.abs(ak.linspace(-5,-1,5))).all()) with self.assertRaises(TypeError) as cm: ak.abs([range(0,10)]) self.assertEqual('type of argument "pda" must be arkouda.pdarrayclass.pdarray; got list instead', cm.exception.args[0])
def testValueCounts(self): pda = ak.ones(100, dtype=ak.int64) result = ak.value_counts(pda) self.assertEqual(ak.array([1]), result[0]) self.assertEqual(ak.array([100]), result[1]) pda = ak.linspace(1, 10, 10) with self.assertRaises(RuntimeError) as cm: ak.value_counts(pda) self.assertEqual('Error: unique: float64 not implemented', cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.value_counts([0]) self.assertEqual( 'type of argument "pda" must be arkouda.pdarrayclass.pdarray; got list instead', cm.exception.args[0])
def test_error_handling(self): with self.assertRaises(TypeError) as cm: ak.where([0], ak.linspace(1, 10, 10), ak.linspace(1, 10, 10)) self.assertEqual(( 'type of argument "condition" must be arkouda.pdarrayclass.pdarray;' + ' got list instead'), cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.where(ak.linspace(1, 10, 10), [0], ak.linspace(1, 10, 10)) self.assertEqual(( 'type of argument "A" must be one of (int, float, int64, pdarray); got list instead' ), cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.where(ak.linspace(1, 10, 10), ak.linspace(1, 10, 10), [0]) self.assertEqual( 'both A and B must be an int, np.int64, float, np.float64, or pdarray', cm.exception.args[0])
def test_linspace(self): pda = ak.linspace(0, 100, 1000) self.assertEqual(1000, len(pda)) self.assertEqual(float, pda.dtype) self.assertIsInstance(pda, ak.pdarray) pda = ak.linspace(0.0, 100.0, 150) pda = ak.linspace(start=5, stop=0, length=6) self.assertEqual(5.0000, pda[0]) self.assertEqual(0.0000, pda[5]) pda = ak.linspace(start=5.0, stop=0.0, length=6) self.assertEqual(5.0000, pda[0]) self.assertEqual(0.0000, pda[5]) pda = ak.linspace(start=np.float(5.0), stop=np.float(0.0), length=np.int64(6)) self.assertEqual(5.0000, pda[0]) self.assertEqual(0.0000, pda[5]) with self.assertRaises(TypeError) as cm: ak.linspace(0, '100', 1000) self.assertEqual(( 'both start and stop must be an int, np.int64, float, or np.float64' ), cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.linspace('0', 100, 1000) self.assertEqual(( 'both start and stop must be an int, np.int64, float, or np.float64' ), cm.exception.args[0]) with self.assertRaises(TypeError) as cm: ak.linspace(0, 100, '1000') self.assertEqual( 'type of argument "length" must be one of (int, int64); got str instead', cm.exception.args[0])
def run_tests(verbose): # ignore numpy warnings like divide by 0 np.seterr(all='ignore') global pdarrays pdarrays = { 'int64': ak.arange(0, SIZE, 1), 'float64': ak.linspace(0, 2, SIZE), 'bool': (ak.arange(0, SIZE, 1) % 2) == 0 } global ndarrays ndarrays = { 'int64': np.arange(0, SIZE, 1), 'float64': np.linspace(0, 2, SIZE), 'bool': (np.arange(0, SIZE, 1) % 2) == 0 } global scalars #scalars = {k: v[SIZE//2] for k, v in ndarrays.items()} scalars = {'int64': 5, 'float64': 3.14159, 'bool': True} dtypes = pdarrays.keys() if verbose: print("Operators: ", ak.pdarray.BinOps) print("Dtypes: ", dtypes) print("pdarrays: ") for k, v in pdarrays.items(): print(k, ": ", v) print("ndarrays: ") for k, v in ndarrays.items(): print(k, ": ", v) print("scalars: ") for k, v in scalars.items(): print(k, ": ", v) def do_op(lt, rt, ls, rs, isarkouda, oper): evalstr = '' if ls: evalstr += 'scalars["{}"]'.format(lt) else: evalstr += '{}["{}"]'.format(('ndarrays', 'pdarrays')[isarkouda], lt) evalstr += ' {} '.format(oper) if rs: evalstr += 'scalars["{}"]'.format(rt) else: evalstr += '{}["{}"]'.format(('ndarrays', 'pdarrays')[isarkouda], rt) #print(evalstr) res = eval(evalstr) return res results = { 'neither_implement': [], # (expression, ak_error) 'arkouda_minus_numpy': [], # (expression, ak_result, error_on_exec?) 'numpy_minus_arkouda': [], # (expression, ak_result, error_on_exec?) 'both_implement': [] } # (expression, ak_result, error_on_exec?, dtype_mismatch?, value_mismatch?) tests = 0 for ltype, rtype, op in product(dtypes, dtypes, ak.pdarray.BinOps): for lscalar, rscalar in ((False, False), (False, True), (True, False)): tests += 1 expression = "{}({}) {} {}({})".format(ltype, ('array', 'scalar')[lscalar], op, rtype, ('array', 'scalar')[rscalar]) try: npres = do_op(ltype, rtype, lscalar, rscalar, False, op) except TypeError: # numpy doesn't implement operation try: akres = do_op(ltype, rtype, lscalar, rscalar, True, op) except RuntimeError as e: if 'not implemented' in str( e): # neither numpy nor arkouda implement results['neither_implement'].append( (expression, str(e))) else: # arkouda implements with error, np does not implement results['arkouda_minus_numpy'].append( (expression, str(e), True)) continue # arkouda implements but not numpy results['arkouda_minus_numpy'].append( (expression, str(akres), False)) continue try: akres = do_op(ltype, rtype, lscalar, rscalar, True, op) except RuntimeError as e: if 'not implemented' in str( e): # numpy implements but not arkouda results['numpy_minus_arkouda'].append( (expression, str(e), True)) else: # both implement, but arkouda errors results['both_implement'].append( (expression, str(e), True, False, False)) continue # both numpy and arkouda execute without error try: akrestype = akres.dtype except Exception as e: warnings.warn( "Cannot detect return dtype of ak result: {} (np result: {})" .format(akres, npres)) results['both_implement'].append( (expression, str(akres), False, True, False)) continue if akrestype != npres.dtype: restypes = "{}(np) vs. {}(ak)".format(npres.dtype, akrestype) #warnings.warn("dtype mismatch: {} = {}".format(expression, restypes)) results['both_implement'].append( (expression, restypes, False, True, False)) continue try: akasnp = akres.to_ndarray() except Exception as e: warnings.warn("Could not convert to ndarray: {}".format(akres)) results['both_implement'].append( (expression, str(akres), True, False, False)) continue if not np.allclose(akasnp, npres, equal_nan=True): res = "np: {}\nak: {}".format(npres, akasnp) # warnings.warn("result mismatch: {} =\n{}".format(expression, res)) results['both_implement'].append( (expression, res, False, False, True)) # Finally, both numpy and arkouda agree on result results['both_implement'].append( (expression, "", False, False, False)) print("# ops not implemented by numpy or arkouda: {}".format( len(results['neither_implement']))) if verbose: for expression, err in results['neither_implement']: print(expression) print("# ops implemented by numpy but not arkouda: {}".format( len(results['numpy_minus_arkouda']))) if verbose: for expression, err, flag in results['numpy_minus_arkouda']: print(expression) print("# ops implemented by arkouda but not numpy: {}".format( len(results['arkouda_minus_numpy']))) if verbose: for expression, res, flag in results['arkouda_minus_numpy']: print(expression, " -> ", res) nboth = len(results['both_implement']) print("# ops implemented by both: {}".format(nboth)) matches = 0 execerrors = [] dtypeerrors = [] valueerrors = [] for (expression, res, ex, dt, val) in results['both_implement']: matches += not any((ex, dt, val)) if ex: execerrors.append((expression, res)) if dt: dtypeerrors.append((expression, res)) if val: valueerrors.append((expression, res)) print(" Matching results: {} / {}".format(matches, nboth)) print(" Arkouda execution errors: {} / {}".format(len(execerrors), nboth)) if verbose: print('\n'.join(map(': '.join, execerrors))) print(" Dtype mismatches: {} / {}".format(len(dtypeerrors), nboth)) if verbose: print('\n'.join(map(': '.join, dtypeerrors))) print(" Value mismatches: {} / {}".format(len(valueerrors), nboth)) if verbose: print('\n'.join(map(': '.join, valueerrors))) return matches == nboth