def compare_strategies(length, ncat, op, dtype):
    keys = ak.randint(0, ncat, length)
    if dtype == 'int64':
        vals = ak.randint(0, length // ncat, length)
    elif dtype == 'bool':
        vals = ak.zeros(length, dtype='bool')
        for i in np.random.randint(0, length, ncat // 2):
            vals[i] = True
    else:
        vals = ak.linspace(-1, 1, length)
    print("Global groupby", end=' ')
    start = time()
    gg = ak.GroupBy(keys, False)
    ggtime = time() - start
    print(ggtime)
    print("Global reduce", end=' ')
    start = time()
    gk, gv = gg.aggregate(vals, op)
    grtime = time() - start
    print(grtime)
    print("Local groupby", end=' ')
    start = time()
    lg = ak.GroupBy(keys, True)
    lgtime = time() - start
    print(lgtime)
    print("Local reduce", end=' ')
    start = time()
    lk, lv = lg.aggregate(vals, op)
    lrtime = time() - start
    print(lrtime)
    print(f"Keys match? {(gk == lk).all()}")
    print(f"Absolute diff of vals = {ak.abs(gv - lv).sum()}")
    return ggtime, grtime, lgtime, lrtime
Exemple #2
0
def check_linspace(N):
    # create np version
    a = np.linspace(10, 20, N)
    # create ak version
    b = ak.linspace(10, 20, N)
    # print(a,b)
    f = np.allclose(a, b.to_ndarray())
    return pass_fail(f)
Exemple #3
0
def check_linspace(N):
    # create np version
    a = ak.array(np.linspace(10, 20, N))
    # create ak version
    b = ak.linspace(10, 20, N)
    # print(a,b)
    c = a == b
    # print(type(c),c)
    return pass_fail(c.all())
def generate_arrays(length, nkeys, nvals, dtype='int64'):
    keys = ak.randint(0, nkeys, length)
    if dtype == 'int64':
        vals = ak.randint(0, nvals, length)
    elif dtype == 'bool':
        vals = ak.zeros(length, dtype='bool')
        for i in np.random.randint(0, length, nkeys // 2):
            vals[i] = True
    else:
        vals = ak.linspace(-1, 1, length)
    return keys, vals
Exemple #5
0
def run_tests():
    global pdarrays
    pdarrays = {
        'int64': ak.arange(1, SIZE + 1, 1),
        'float64': ak.linspace(0, 1, SIZE),
        'bool': (ak.arange(0, SIZE, 1) % 2) == 0
    }
    global ndarrays
    ndarrays = {
        'int64': np.arange(1, SIZE + 1, 1),
        'float64': np.linspace(0, 1, SIZE),
        'bool': (np.arange(0, SIZE, 1) % 2) == 0
    }
    global scalars
    #scalars = {k: v[SIZE//2] for k, v in ndarrays.items()}
    scalars = {'int64': 5, 'float64': 3.14159, 'bool': True}
    dtypes = pdarrays.keys()
    print("Dtypes: ", dtypes)
    print("pdarrays: ")
    for k, v in pdarrays.items():
        print(k, ": ", v)
    print("ndarrays: ")
    for k, v in ndarrays.items():
        print(k, ": ", v)
    print("scalars: ")
    for k, v in scalars.items():
        print(k, ": ", v)

    def do_op(lt, rt, ls, rs, isarkouda, oper):
        evalstr = ''
        if ls:
            evalstr += 'scalars["{}"]'.format(lt)
        else:
            evalstr += '{}["{}"]'.format(('ndarrays', 'pdarrays')[isarkouda],
                                         lt)
        evalstr += ' {} '.format(oper)
        if rs:
            evalstr += 'scalars["{}"]'.format(rt)
        else:
            evalstr += '{}["{}"]'.format(('ndarrays', 'pdarrays')[isarkouda],
                                         rt)
        #print(evalstr)
        res = eval(evalstr)
        return res

    results = {
        'not_implemented': [],
        'caught': [],
        'wrong_dtype': [],
        'wrong_value': [],
        'failure': []
    }
    tests = 0
    for ltype, rtype, op in product(dtypes, dtypes, ak.pdarray.BinOps):
        for lscalar, rscalar in ((False, False), (False, True), (True, False)):
            tests += 1
            expression = "{}({}) {} {}({})".format(ltype, ('array',
                                                           'scalar')[lscalar],
                                                   op, rtype,
                                                   ('array',
                                                    'scalar')[rscalar])
            try:
                npres = do_op(ltype, rtype, lscalar, rscalar, False, op)
            except TypeError:  # numpy doesn't implement operation
                try:
                    akres = do_op(ltype, rtype, lscalar, rscalar, True, op)
                except RuntimeError as e:
                    results['not_implemented'].append((expression, e))
                continue
            try:
                akres = do_op(ltype, rtype, lscalar, rscalar, True, op)
            except RuntimeError as e:
                warnings.warn("Error computing {}\n{}".format(
                    expression, str(e)))

                results['caught'].append((expression, e))
                continue
            try:
                akrestype = akres.dtype
            except Exception as e:
                warnings.warn(
                    "Cannot detect return dtype of ak result: {} (np result: {})"
                    .format(akres, npres))
                results['failure'].append((expression, e))
                continue

            if akrestype != npres.dtype:
                restypes = "{}(np) vs. {}(ak)".format(npres.dtype, akrestype)
                warnings.warn("dtype mismatch: {} = {}".format(
                    expression, restypes))
                results['wrong_dtype'].append((expression, restypes))
                continue
            try:
                akasnp = akres.to_ndarray()
            except Exception as e:
                warnings.warn("Could not convert to ndarray: {}".format(akres))
                results['failure'].append((expression, e))
                continue
            if not np.allclose(akasnp, npres, equal_nan=True):
                res = "np: {}\nak: {}".format(npres, akasnp)
                warnings.warn("result mismatch: {} =\n{}".format(
                    expression, res))
                results['wrong_value'].append((expression, res))
    for errtype, errs in results.items():
        print("{} {}".format(len(errs), errtype))
        for expr, msg in errs:
            print(expr)
            print(msg)
    print("{} differences from numpy in {} tests".format(
        sum(len(errs)
            for errs in results.values()) - len(results['not_implemented']),
        tests))
Exemple #6
0
print(iv)
b = ak.zeros(iv.size,dtype=ak.int64)
a[iv] = b
print(a)

ak.v = False
a = ak.randint(10,30,40)
vc = ak.value_counts(a)
print(vc[0].size,vc[0])
print(vc[1].size,vc[1])

ak.v = False

a = ak.arange(0,10,1)
b = a[a<5]
a = ak.linspace(0,9,10)
b = a[a<5]
print(b)

ak.v = True
ak.pdarrayIterThresh = 1000
a = ak.arange(0,10,1)
print(list(a))

ak.v = False
a = ak.randint(10,30,40)
u = ak.unique(a)
h = ak.histogram(a,bins=20)
print(a)
print(h.size,h)
print(u.size,u)
Exemple #7
0
def run_tests(verbose):
    global pdarrays
    pdarrays = {
        'int64': ak.arange(0, SIZE, 1),
        'float64': ak.linspace(0, 2, SIZE),
        'bool': (ak.arange(0, SIZE, 1) % 2) == 0
    }
    global ndarrays
    ndarrays = {
        'int64': np.arange(0, SIZE, 1),
        'float64': np.linspace(0, 2, SIZE),
        'bool': (np.arange(0, SIZE, 1) % 2) == 0
    }
    global scalars
    #scalars = {k: v[SIZE//2] for k, v in ndarrays.items()}
    scalars = {'int64': 5, 'float64': 3.14159, 'bool': True}
    dtypes = pdarrays.keys()
    if verbose:
        print("Operators: ", ak.pdarray.BinOps)
        print("Dtypes: ", dtypes)
        print("pdarrays: ")
        for k, v in pdarrays.items():
            print(k, ": ", v)
        print("ndarrays: ")
        for k, v in ndarrays.items():
            print(k, ": ", v)
        print("scalars: ")
        for k, v in scalars.items():
            print(k, ": ", v)

    def do_op(lt, rt, ls, rs, isarkouda, oper):
        evalstr = ''
        if ls:
            evalstr += 'scalars["{}"]'.format(lt)
        else:
            evalstr += '{}["{}"]'.format(('ndarrays', 'pdarrays')[isarkouda],
                                         lt)
        evalstr += ' {} '.format(oper)
        if rs:
            evalstr += 'scalars["{}"]'.format(rt)
        else:
            evalstr += '{}["{}"]'.format(('ndarrays', 'pdarrays')[isarkouda],
                                         rt)
        #print(evalstr)
        res = eval(evalstr)
        return res

    results = {
        'neither_implement': [],  # (expression, ak_error)
        'arkouda_minus_numpy': [],  # (expression, ak_result, error_on_exec?)
        'numpy_minus_arkouda': [],  # (expression, ak_result, error_on_exec?)
        'both_implement': []
    }  # (expression, ak_result, error_on_exec?, dtype_mismatch?, value_mismatch?)
    tests = 0
    for ltype, rtype, op in product(dtypes, dtypes, ak.pdarray.BinOps):
        for lscalar, rscalar in ((False, False), (False, True), (True, False)):
            tests += 1
            expression = "{}({}) {} {}({})".format(ltype, ('array',
                                                           'scalar')[lscalar],
                                                   op, rtype,
                                                   ('array',
                                                    'scalar')[rscalar])
            try:
                npres = do_op(ltype, rtype, lscalar, rscalar, False, op)
            except TypeError:  # numpy doesn't implement operation
                try:
                    akres = do_op(ltype, rtype, lscalar, rscalar, True, op)
                except RuntimeError as e:
                    if 'not implemented' in str(
                            e):  # neither numpy nor arkouda implement
                        results['neither_implement'].append(
                            (expression, str(e)))
                    else:  # arkouda implements with error, np does not implement
                        results['arkouda_minus_numpy'].append(
                            (expression, str(e), True))
                    continue
                # arkouda implements but not numpy
                results['arkouda_minus_numpy'].append(
                    (expression, str(akres), False))
                continue
            try:
                akres = do_op(ltype, rtype, lscalar, rscalar, True, op)
            except RuntimeError as e:
                if 'not implemented' in str(
                        e):  # numpy implements but not arkouda
                    results['numpy_minus_arkouda'].append(
                        (expression, str(e), True))
                else:  # both implement, but arkouda errors
                    results['both_implement'].append(
                        (expression, str(e), True, False, False))
                continue
            # both numpy and arkouda execute without error
            try:
                akrestype = akres.dtype
            except Exception as e:
                warnings.warn(
                    "Cannot detect return dtype of ak result: {} (np result: {})"
                    .format(akres, npres))
                results['both_implement'].append(
                    (expression, str(akres), False, True, False))
                continue

            if akrestype != npres.dtype:
                restypes = "{}(np) vs. {}(ak)".format(npres.dtype, akrestype)
                #warnings.warn("dtype mismatch: {} = {}".format(expression, restypes))
                results['both_implement'].append(
                    (expression, restypes, False, True, False))
                continue
            try:
                akasnp = akres.to_ndarray()
            except Exception as e:
                warnings.warn("Could not convert to ndarray: {}".format(akres))
                results['both_implement'].append(
                    (expression, str(akres), True, False, False))
                continue
            if not np.allclose(akasnp, npres, equal_nan=True):
                res = "np: {}\nak: {}".format(npres, akasnp)
                # warnings.warn("result mismatch: {} =\n{}".format(expression, res))
                results['both_implement'].append(
                    (expression, res, False, False, True))
            # Finally, both numpy and arkouda agree on result
            results['both_implement'].append(
                (expression, "", False, False, False))

    print("# ops not implemented by numpy or arkouda: {}".format(
        len(results['neither_implement'])))
    if verbose:
        for expression, err in results['neither_implement']:
            print(expression)
    print("# ops implemented by numpy but not arkouda: {}".format(
        len(results['numpy_minus_arkouda'])))
    if verbose:
        for expression, err, flag in results['numpy_minus_arkouda']:
            print(expression)
    print("# ops implemented by arkouda but not numpy: {}".format(
        len(results['arkouda_minus_numpy'])))
    if verbose:
        for expression, res, flag in results['arkouda_minus_numpy']:
            print(expression, " -> ", res)
    nboth = len(results['both_implement'])
    print("# ops implemented by both: {}".format(nboth))
    matches = 0
    execerrors = []
    dtypeerrors = []
    valueerrors = []
    for (expression, res, ex, dt, val) in results['both_implement']:
        matches += not any((ex, dt, val))
        if ex: execerrors.append((expression, res))
        if dt: dtypeerrors.append((expression, res))
        if val: valueerrors.append((expression, res))
    print("  Matching results:         {} / {}".format(matches, nboth))
    print("  Arkouda execution errors: {} / {}".format(len(execerrors), nboth))
    if verbose: print('\n'.join(map(': '.join, execerrors)))
    print("  Dtype mismatches:         {} / {}".format(len(dtypeerrors),
                                                       nboth))
    if verbose: print('\n'.join(map(': '.join, dtypeerrors)))
    print("  Value mismatches:         {} / {}".format(len(valueerrors),
                                                       nboth))
    if verbose: print('\n'.join(map(': '.join, valueerrors)))
    return matches == nboth