Esempio n. 1
0
def check_correctness():
    N = 10**4

    thirds = [ak.cast(ak.arange(i, N*3, 3), 'str') for i in range(3)]
    thickrange = thirds[0].stick(thirds[1], delimiter='_').stick(thirds[2], delimiter='_')

    answer = ak.cast(ak.arange(N*3), 'str')
    assert (thickrange.flatten('_') == answer).all()
    assert (thickrange.flatten('_', regex=True) == answer).all()
    assert (thickrange.flatten('_+', regex=True) == answer).all()
Esempio n. 2
0
def time_flatten(N, trials):
    print(">>> arkouda flatten")
    cfg = ak.get_config()
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))

    thirds = [ak.cast(ak.arange(i, N*3, 3), 'str') for i in range(3)]
    thickrange = thirds[0].stick(thirds[1], delimiter='_').stick(thirds[2], delimiter='_')
    nbytes = thickrange.nbytes * thickrange.entry.itemsize

    non_regex_times = []
    regex_literal_times = []
    regex_pattern_times = []
    for i in range(trials):
        start = time.time()
        non_regex = thickrange.flatten('_')
        end = time.time()
        non_regex_times.append(end - start)

        start = time.time()
        regex_literal = thickrange.flatten('_', regex=True)
        end = time.time()
        regex_literal_times.append(end - start)

        start = time.time()
        regex_pattern = thickrange.flatten('_+', regex=True)
        end = time.time()
        regex_pattern_times.append(end - start)

    avg_non_regex = sum(non_regex_times) / trials
    avg_regex_literal = sum(regex_literal_times) / trials
    avg_regex_pattern = sum(regex_pattern_times) / trials

    answer = ak.cast(ak.arange(N*3), 'str')
    assert (non_regex == answer).all()
    assert (regex_literal == answer).all()
    assert (regex_pattern == answer).all()

    print("non-regex flatten with literal delimiter Average time = {:.4f} sec".format(avg_non_regex))
    print("regex flatten with literal delimiter Average time = {:.4f} sec".format(avg_regex_literal))
    print("regex flatten with pattern delimiter Average time = {:.4f} sec".format(avg_regex_pattern))

    print("non-regex flatten with literal delimiter Average rate = {:.4f} GiB/sec".format(nbytes/2**30/avg_non_regex))
    print("regex flatten with literal delimiter Average rate = {:.4f} GiB/sec".format(nbytes/2**30/avg_regex_literal))
    print("regex flatten with pattern delimiter Average rate = {:.4f} GiB/sec".format(nbytes/2**30/avg_regex_pattern))
Esempio n. 3
0
def power_law(N):
    '''
    Power law distributed (alpha = 2.5) reals and integers in (1, 2**32)
    '''
    y = ak.uniform(N)
    a = -2.5  # power law exponent, between -2 and -3
    ub = 2**32  # upper bound
    data = ((ub**(a + 1) - 1) * y + 1)**(1 / (a + 1))
    yield 'power-law float64', data

    datai = ak.cast(data, ak.int64)
    yield 'power-law int64', datai
Esempio n. 4
0
def check_correctness(dtype, seed):
    N = 10**4
    if dtype == 'int64':
        a = ak.randint(0, 2**32, N, seed=seed)
        z = ak.zeros(N, dtype=dtype)
    elif dtype == 'float64':
        a = ak.randint(0, 1, N, dtype=ak.float64, seed=seed)
        z = ak.zeros(N, dtype=dtype)
    elif dtype == 'str':
        a = ak.random_strings_uniform(1, 16, N, seed=seed)
        z = ak.cast(ak.zeros(N), 'str')

    perm = ak.coargsort([a, z])
    if dtype in ('int64', 'float64'):
        assert ak.is_sorted(a[perm])
    perm = ak.coargsort([z, a])
    if dtype in ('int64', 'float64'):
        assert ak.is_sorted(a[perm])
Esempio n. 5
0
def time_ak_gather(isize, vsize, trials, dtype, random, seed):
    print(">>> arkouda {} gather".format(dtype))
    cfg = ak.get_config()
    Ni = isize * cfg["numLocales"]
    Nv = vsize * cfg["numLocales"]
    print("numLocales = {}, num_indices = {:,} ; num_values = {:,}".format(cfg["numLocales"], Ni, Nv))
    # Index vector is always random
    i = ak.randint(0, Nv, Ni, seed=seed)
    if seed is not None:
        seed += 1
    if random or seed is not None:
        if dtype == 'int64':
            v = ak.randint(0, 2**32, Nv, seed=seed)
        elif dtype == 'float64':
            v = ak.randint(0, 1, Nv, dtype=ak.float64, seed=seed)
        elif dtype == 'bool':
            v = ak.randint(0, 1, Nv, dtype=ak.bool, seed=seed)
        elif dtype == 'str':
            v = ak.random_strings_uniform(1, 16, Nv, seed=seed)
    else:   
        if dtype == 'str':
            v = ak.cast(ak.arange(Nv), 'str')
        else:
            v = ak.ones(Nv, dtype=dtype)
    
    timings = []
    for _ in range(trials):
        start = time.time()
        c = v[i]
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    print("Average time = {:.4f} sec".format(tavg))
    if dtype == 'str':
        offsets_transferred = 3 * c.offsets.size * c.offsets.itemsize
        bytes_transferred = (c.offsets.size * c.offsets.itemsize) + (2 * c.bytes.size)
        bytes_per_sec = (offsets_transferred + bytes_transferred) / tavg
    else:
        bytes_per_sec = (c.size * c.itemsize * 3) / tavg
    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec/2**30))