Beispiel #1
0
 def test_plus_minus(self):
     # Datetime + Datetime not supported
     with self.assertRaises(TypeError) as cm:
         self.dtvec1 + self.dtvec2
     # Datetime slice -> Datetime
     leading = self.dtvec1[1:]
     trailing = self.dtvec1[:-1]
     self.assertTrue(isinstance(leading, ak.Datetime) and isinstance(trailing, ak.Datetime))
     # Datetime - Datetime -> Timedelta
     diff = leading - trailing
     self.assertTrue(isinstance(diff, ak.Timedelta))
     self.assertTrue((diff == self.onesecond).all())
     # Datetime - DatetimeScalar -> Timedelta
     diff = self.dtvec1 - self.dtscalar
     trange = ak.timedelta_range(start=0, periods=100, freq='s')
     self.assertTrue(isinstance(diff, ak.Timedelta))
     self.assertTrue((diff == trange).all())
     # DatetimeScalar - Datetime -> Timedelta
     diff = self.dtscalar - self.dtvec1
     self.assertTrue(isinstance(diff, ak.Timedelta))
     self.assertTrue((diff == (-trange)).all())
     # Datetime + TimedeltaScalar -> Datetime
     t = (trailing + self.onesecond)
     self.assertTrue(isinstance(t, ak.Datetime))
     self.assertTrue((t == leading).all())
     # TimedeltaScalar + Datetime -> Datetime
     t = (self.onesecond + trailing)
     self.assertTrue(isinstance(t, ak.Datetime))
     self.assertTrue((t == leading).all())
     # Datetime - TimedeltaScalar -> Datetime
     t = leading - self.onesecond
     self.assertTrue(isinstance(t, ak.Datetime))
     self.assertTrue((t == trailing).all())
     # Datetime + Timedelta -> Datetime
     t = (trailing + self.tdvec1[1:])
     self.assertTrue(isinstance(t, ak.Datetime))
     self.assertTrue((t == leading).all())
     # Timedelta + Datetime -> Datetime
     t = (self.tdvec1[1:] + trailing)
     self.assertTrue(isinstance(t, ak.Datetime))
     self.assertTrue((t == leading).all())
     # Datetime - Timedelat -> Datetime
     t = (leading - self.tdvec1[1:])
     self.assertTrue(isinstance(t, ak.Datetime))
     # Timedelta + Timedelta -> Timedelta
     t = self.tdvec1 + self.tdvec1
     self.assertTrue(isinstance(t, ak.Timedelta))
     self.assertTrue((t == ak.Timedelta(2*ak.ones(100, dtype=ak.int64), unit='s')).all())
     # Timedelta + TimedeltaScalar -> Timedelta
     t = self.tdvec1 + self.onesecond
     self.assertTrue(isinstance(t, ak.Timedelta))
     self.assertTrue((t == ak.Timedelta(2*ak.ones(100, dtype=ak.int64), unit='s')).all())
     # Timedelta - Timedelta -> Timedelta
     t = self.tdvec1 - self.tdvec1
     self.assertTrue(isinstance(t, ak.Timedelta))
     self.assertTrue((t == ak.Timedelta(ak.zeros(100, dtype=ak.int64), unit='s')).all())
     # Timedelta - TimedeltaScalar -> Timedelta
     t = self.tdvec1 - self.onesecond
     self.assertTrue(isinstance(t, ak.Timedelta))
     self.assertTrue((t == ak.Timedelta(ak.zeros(100, dtype=ak.int64), unit='s')).all())
Beispiel #2
0
def rmat(size):
    '''
    RMAT-generated edges (coargsort of two vertex arrays)
    '''
    # N = number of edges = number of elements / 2
    N = size // 2
    avgdegree = 10
    lgNv = int(np.log2(N / avgdegree))
    # number of vertices
    Nv = 2**lgNv
    # probabilities
    a = 0.01
    b = (1.0 - a) / 3.0
    c = b
    d = b
    # quantites to use in edge generation loop
    ab = a + b
    c_norm = c / (c + d)
    a_norm = a / (a + b)
    # init edge arrays
    ii = ak.ones(N, dtype=ak.int64)
    jj = ak.ones(N, dtype=ak.int64)
    # generate edges
    for ib in range(1, lgNv):
        ii_bit = (ak.uniform(N) > ab)
        jj_bit = (ak.uniform(N) > (c_norm * ii_bit + a_norm * (~ii_bit)))
        ii = ii + ((2**(ib - 1)) * ii_bit)
        jj = jj + ((2**(ib - 1)) * jj_bit)

    yield 'RMAT int64', (ii, jj)
Beispiel #3
0
    def get_ngrams(self, n, return_origins=True):
        """
        Return all n-grams from all sub-arrays.

        Parameters
        ----------
        n : int
            Length of n-gram
        return_origins : bool
            If True, return an int64 array indicating which sub-array 
            each returned n-gram came from.
        
        Returns
        -------
        ngrams : list of pdarray
            An n-long list of pdarrays, essentially a table where each row is an n-gram.
        origin_indices : pdarray, int
            The index of the sub-array from which the corresponding n-gram originated
        """
        ngrams = []
        notsegstart = ak.ones(self.valsize, dtype=ak.bool)
        notsegstart[self.segments] = False
        valid = ak.ones(self.valsize - n + 1, dtype=ak.bool)
        for i in range(n):
            end = self.valsize - n + i + 1
            ngrams.append(self.values[i:end])
            if i > 0:
                valid &= notsegstart[i:end]
        ngrams = [char[valid] for char in ngrams]
        if return_origins:
            origin_indices = self.grouping.broadcast(
                ak.arange(self.size), permute=True)[:valid.size][valid]
            return ngrams, origin_indices
        else:
            return ngrams
Beispiel #4
0
def time_ak_stream(N_per_locale, trials, alpha, dtype, random):
    print(">>> arkouda stream")
    cfg = ak.get_config()
    N = N_per_locale * cfg["numLocales"]
    print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
    if random:
        if dtype == 'int64':
            a = ak.randint(0, 2**32, N)
            b = ak.randint(0, 2**32, N)
        elif dtype == 'float64':
            a = ak.randint(0, 1, N, dtype=ak.float64)
            b = ak.randint(0, 1, N, dtype=ak.float64)
    else:
        a = ak.ones(N, dtype=dtype)
        b = ak.ones(N, dtype=dtype)

    timings = []
    for i in range(trials):
        start = time.time()
        c = a + b * alpha
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    print("Average time = {:.4f} sec".format(tavg))
    bytes_per_sec = (c.size * c.itemsize * 3) / tavg
    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec / 2**30))
Beispiel #5
0
 def check_equal(pdunit, akunit):
     pdval = pd.Timestamp(1, unit=pdunit)
     akval = ak.Datetime(ak.ones(10, dtype=ak.int64), unit=akunit)[0]
     try:
         self.assertEqual(pdval, akval)
     except AssertionError:
         logging.getLogger().error("pandas {} ({}) != arkouda {} ({})".format(pdunit, pdval, akunit, akval))
     pdval = pd.Timedelta(1, unit=pdunit)
     akval = ak.Timedelta(ak.ones(10, dtype=ak.int64), unit=akunit)[0]
     try:
         self.assertEqual(pdval, akval)
     except AssertionError:
         logging.getLogger().error("pandas {} ({}) != arkouda {} ({})".format(pdunit, pdval, akunit, akval))
Beispiel #6
0
def time_ak_scatter(isize, vsize, trials, dtype, random):
    print(">>> arkouda scatter")
    cfg = ak.get_config()
    Ni = isize * cfg["numLocales"]
    Nv = vsize * cfg["numLocales"]
    print("numLocales = {}, num_indices = {:,} ; num_values = {:,}".format(
        cfg["numLocales"], Ni, Nv))
    # Index vector is always random
    i = ak.randint(0, Nv, Ni)
    c = ak.zeros(Nv, dtype=dtype)
    if random:
        if dtype == 'int64':
            v = ak.randint(0, 2**32, Ni)
        elif dtype == 'float64':
            v = ak.randint(0, 1, Ni, dtype=ak.float64)
    else:
        v = ak.ones(Ni, dtype=dtype)

    timings = []
    for _ in range(trials):
        start = time.time()
        c[i] = v
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    print("Average time = {:.4f} sec".format(tavg))
    bytes_per_sec = (i.size * i.itemsize * 3) / tavg
    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec / 2**30))
Beispiel #7
0
 def __eq__(self, v):
     if type(v) != list and type(v) != tuple:
         raise TypeError("Cannot compare MultiIndex to a scalar")
     retval = ak.ones(len(self), dtype=ak.bool)
     for a, b in zip(self.index, v):
         retval &= (a == b)
     return retval
Beispiel #8
0
def gen_ranges(starts, ends, stride=1):
    """ Generate a segmented array of variable-length, contiguous ranges between pairs of start- and end-points.

    Parameters
    ----------
    starts : pdarray, int64
        The start value of each range
    ends : pdarray, int64
        The end value (exclusive) of each range
    stride: int
        Difference between successive elements of each range

    Returns
    -------
    segments : pdarray, int64
        The starting index of each range in the resulting array
    ranges : pdarray, int64
        The actual ranges, flattened into a single array
    """
    if starts.size != ends.size:
        raise ValueError("starts and ends must be same length")
    if starts.size == 0:
        return ak.zeros(0, dtype=ak.int64), ak.zeros(0, dtype=ak.int64)
    lengths = (ends - starts) // stride
    segs = ak.cumsum(lengths) - lengths
    totlen = lengths.sum()
    slices = ak.ones(totlen, dtype=ak.int64)
    diffs = ak.concatenate(
        (ak.array([starts[0]]),
         starts[1:] - starts[:-1] - (lengths[:-1] - 1) * stride))
    slices[segs] = diffs
    return segs, ak.cumsum(slices)
Beispiel #9
0
def gen_ranges(starts, ends):
    """ Generate a segmented array of variable-length, contiguous 
    ranges between pairs of start- and end-points.

    Parameters
    ----------
    starts : pdarray, int64
        The start value of each range
    ends : pdarray, int64
        The end value (exclusive) of each range

    Returns
    -------
    segments : pdarray, int64
        The starting index of each range in the resulting array
    ranges : pdarray, int64
        The actual ranges, flattened into a single array
    """
    if starts.size != ends.size:
        raise ValueError("starts and ends must be same size")
    if not ((ends - starts) > 0).all():
        raise ValueError("all ends must be greater than starts")
    lengths = ends - starts
    segs = ak.cumsum(lengths) - lengths
    totlen = lengths.sum()
    slices = ak.ones(totlen, dtype=ak.int64)
    diffs = ak.concatenate(
        (ak.array([starts[0]]), starts[1:] - starts[:-1] - lengths[:-1] + 1))
    slices[segs] = diffs
    return segs, ak.cumsum(slices)
Beispiel #10
0
 def setUp(self):
     ArkoudaTest.setUp(self)
     self.dtvec1 = ak.date_range(start='2021-01-01 12:00:00', periods=100, freq='s')
     self.dtvec2 = ak.Datetime(pd.date_range('2021-01-01 12:00:00', periods=100, freq='s'))
     self.dtscalar = pd.Timestamp('2021-01-01 12:00:00')
     self.tdvec1 = ak.timedelta_range(start='1 second', end='1 second', periods=100)
     self.tdvec2 = ak.Timedelta(ak.ones(100, dtype=ak.int64), unit='s')
     self.onesecond = pd.Timedelta(1, unit='s')
Beispiel #11
0
def create_ak_array(N, op, dtype, seed):
    if op == 'zeros': 
        a = ak.zeros(N, dtype=dtype)
    elif op == 'ones':
        a = ak.ones(N, dtype=dtype)
    elif op == 'randint':
        a = ak.randint(0, 2**32, N, dtype=dtype, seed=seed)
    return a
Beispiel #12
0
def is_cosorted(data):
    # (b[0] > a[0]) | ((b[0] == a[0]) & recurse(a[1], b[1]))
    def helper(x, right):
        return (x[1:] > x[:-1]) | ((x[1:] == x[:-1]) & right)

    right = ak.ones(data[0].size - 1, dtype=ak.bool)
    for x in reversed(data):
        right = helper(x, right)
    return right.all()
Beispiel #13
0
def testPdArraySubtractNumpyInt():
    aArray = ak.ones(100)
    addArray =  aArray - np.int64(2)
    assert isinstance(addArray, ak.pdarray)
    assert np.float64(-1) == addArray[0]

    addArray =  np.int64(2) - aArray
    assert isinstance(addArray, ak.pdarray)
    assert np.float64(1) == addArray[0]
Beispiel #14
0
def check_ones(N):
    # create np version
    a = ak.array(np.ones(N))
    # create ak version
    b = ak.ones(N)
    # print(a,b)
    c = a == b
    # print(type(c),c)
    return pass_fail(c.all())
Beispiel #15
0
def testPdArraySubtractInt():
    aArray = ak.ones(100)
    addArray =  aArray - 2
    assert isinstance(addArray, ak.pdarray)
    assert np.float64(-1) == addArray[0]

    addArray =  2 - aArray
    assert isinstance(addArray, ak.pdarray)
    assert np.float64(1) == addArray[0]
Beispiel #16
0
def testPdArrayMultInt():
    aArray = ak.ones(100)
    mArray =  aArray*5
    assert isinstance(mArray, ak.pdarray)
    assert np.float64(5) == mArray[0]
    
    mArray =  5*aArray
    assert isinstance(mArray, ak.pdarray)
    assert np.float64(5) == mArray[0]
Beispiel #17
0
def testPdArrayMultNumpyInt():
    aArray = ak.ones(100)
    mArray =  aArray*np.int64(5)
    assert isinstance(mArray, ak.pdarray)
    assert np.float64(5) == mArray[0]
    
    mArray =  np.int64(5)*aArray
    assert isinstance(mArray, ak.pdarray)
    assert np.float64(5) == mArray[0]
Beispiel #18
0
def testPdArrayDivideInt():
    aArray = ak.ones(100)
    mArray =  aArray*15/3
    assert isinstance(mArray, ak.pdarray)
    assert np.float64(5) == mArray[0]
    
    mArray =  15*aArray/3
    assert isinstance(mArray, ak.pdarray)
    assert np.float64(5) == mArray[0]
Beispiel #19
0
def testPdArrayDivideNumpyInt():
    aArray = ak.ones(100)
    mArray =  aArray*np.int64(15)/3
    assert isinstance(mArray, ak.pdarray)
    assert np.float64(5) == mArray[0]
    
    mArray =  np.int64(15)*aArray/3
    assert isinstance(mArray, ak.pdarray)
    assert np.float64(5) == mArray[0]
Beispiel #20
0
def check_set_slice(N):
    # create np version
    a = np.ones(N)
    a[::2] = a[::2] * -1
    # create ak version
    b = ak.ones(N)
    b[::2] = b[::2] * -1
    # print(a,b)
    c = a == b.to_ndarray()
    return pass_fail(c.all())
Beispiel #21
0
def testPdArrayAddInt():
    aArray = ak.ones(100)

    addArray = aArray + 1
    assert isinstance(addArray, ak.pdarray)
    assert np.float64(2) == addArray[0]

    addArray = 1 + aArray
    assert isinstance(addArray, ak.pdarray)
    assert np.float64(2) == addArray[0]
Beispiel #22
0
def testPdArrayAddNumpyInt():
    aArray = ak.ones(100)

    addArray = aArray + np.int64(1)
    assert isinstance(addArray, ak.pdarray)
    assert np.float64(2) == addArray[0]
    
    addArray = np.int64(1) + aArray
    assert isinstance(addArray, ak.pdarray)
    assert np.float64(2) == addArray[0]
Beispiel #23
0
def gen_rmat_edges(lgNv, Ne_per_v, p, perm=False):
    # number of vertices
    Nv = 2**lgNv
    # number of edges
    Ne = Ne_per_v * Nv
    # probabilities
    a = p
    b = (1.0 - a) / 3.0
    c = b
    d = b
    # init edge arrays
    ii = ak.ones(Ne, dtype=ak.int64)
    jj = ak.ones(Ne, dtype=ak.int64)
    # quantites to use in edge generation loop
    ab = a + b
    c_norm = c / (c + d)
    a_norm = a / (a + b)
    # generate edges
    for ib in range(1, lgNv):
        ii_bit = (ak.randint(0, 1, Ne, dtype=ak.float64) > ab)
        jj_bit = (ak.randint(0, 1, Ne, dtype=ak.float64) >
                  (c_norm * ii_bit + a_norm * (~ii_bit)))
        ii = ii + ((2**(ib - 1)) * ii_bit)
        jj = jj + ((2**(ib - 1)) * jj_bit)
    # sort all based on ii and jj using coargsort
    # all edges should be sorted based on both vertices of the edge
    iv = ak.coargsort((ii, jj))
    # permute into sorted order
    ii = ii[iv]  # permute first vertex into sorted order
    jj = jj[iv]  # permute second vertex into sorted order
    # to premute/rename vertices
    if perm:
        # generate permutation for new vertex numbers(names)
        ir = ak.argsort(ak.randint(0, 1, Nv, dtype=ak.float64))
        # renumber(rename) vertices
        ii = ir[ii]  # rename first vertex
        jj = ir[jj]  # rename second vertex
    #
    # maybe: remove edges which are self-loops???
    #
    # return pair of pdarrays
    return (ii, jj)
Beispiel #24
0
def check_bool(N):
    a = ak.arange(N)
    b = ak.ones(N)
    try:
        c = a and b
    except ValueError:
        correct = True
    except:
        correct = False
    d = ak.array([1])
    correct = correct and (d and 5)
    return pass_fail(correct)
Beispiel #25
0
def check_set_slice(N):
    # create np version
    a = np.ones(N)
    a[::2] = a[::2] * -1
    a = ak.array(a)
    # create ak version
    b = ak.ones(N)
    b[::2] = b[::2] * -1
    # print(a,b)
    c = a == b
    # print(type(c),c)
    return pass_fail(c.all())
Beispiel #26
0
def time_ak_gather(isize, vsize, trials, dtype, random):
    print(">>> arkouda gather")
    cfg = ak.get_config()
    Ni = isize * cfg["numLocales"]
    Nv = vsize * cfg["numLocales"]
    print("numLocales = {}, num_indices = {:,} ; num_values = {:,}".format(cfg["numLocales"], Ni, Nv))
    # Index vector is always random
    i = ak.randint(0, Nv, Ni)
    if random:
        if dtype == 'int64':
            v = ak.randint(0, 2**32, Nv)
        elif dtype == 'float64':
            v = ak.randint(0, 1, Nv, dtype=ak.float64)
        elif dtype == 'str':
            v = ak.random_strings_uniform(1, 16, Nv)
    else:   
        if dtype == 'str':
            v = ak.random_strings_uniform(1, 16, Nv)
        else:
            v = ak.ones(Nv, dtype=dtype)
    print("v={}".format(v))    
    print("v.offsets={}".format(v.offsets))    
    print("v.nbytes={}".format(v.nbytes))    
    print("v[1]={}".format(v[1]))    
    print("In Gather size={}".format(v.size))    
    print("In Gather nbytes={}".format(v.nbytes))    
    print("In Gather ndim={}".format(v.ndim))    
    print("In Gather shape={}".format(v.shape))    
    print("In Gather offsets name ={}".format(v.offsets.name))
    print("In Gather offsets size={}".format(v.offsets.size))
    print("In Gather bytes name ={}".format(v.bytes.name))
    print("In Gather bytes size={}".format(v.bytes.size))
    timings = []
    for _ in range(trials):
        print("In Gather loop i={}".format(i))
        print("In Gather v[i]={}".format(v[i]))
        start = time.time()
        c = v[i]
        end = time.time()
        print("In Gather loop c={}".format(c))
        timings.append(end - start)
    tavg = sum(timings) / trials

    print("Average time = {:.4f} sec".format(tavg))
    if dtype == 'str':
        offsets_transferred = 3 * c.offsets.size * c.offsets.itemsize
        bytes_transferred = (c.offsets.size * c.offsets.itemsize) + (2 * c.bytes.size)
        bytes_per_sec = (offsets_transferred + bytes_transferred) / tavg
    else:
        bytes_per_sec = (c.size * c.itemsize * 3) / tavg
    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec/2**30))
Beispiel #27
0
 def select_clusters(self):
     print("Computing Selection and Stability.")
     # Perhaps keep track of a "final clusters" array, that we update as we
     # work through this function.
     self.selection_data['selected'] = ak.ones(self.selection_data.size, dtype=ak.bool)
     byparent = ak.GroupBy(self.selection_data['parent'])
     uk = byparent.unique_keys
     for p in tqdm(uk[1:]):
         children = self.selection_data['index'][self.selection_data['parent'] == p]
         c_stab = (self.selection_data['stability'][children]).sum()
         p_stab = self.selection_data['stability'][p]
         if c_stab >= p_stab:
             self.selection_data['stability'][p] = c_stab
             self.selection_data['selected'][p] = False
         else:
             self.deselect_children(node=p)
     print("Selection and Stability computation is complete!")
Beispiel #28
0
def time_ak_gather(isize, vsize, trials, dtype, random, seed):
    print(">>> arkouda {} gather".format(dtype))
    cfg = ak.get_config()
    Ni = isize * cfg["numLocales"]
    Nv = vsize * cfg["numLocales"]
    print("numLocales = {}, num_indices = {:,} ; num_values = {:,}".format(cfg["numLocales"], Ni, Nv))
    # Index vector is always random
    i = ak.randint(0, Nv, Ni, seed=seed)
    if seed is not None:
        seed += 1
    if random or seed is not None:
        if dtype == 'int64':
            v = ak.randint(0, 2**32, Nv, seed=seed)
        elif dtype == 'float64':
            v = ak.randint(0, 1, Nv, dtype=ak.float64, seed=seed)
        elif dtype == 'bool':
            v = ak.randint(0, 1, Nv, dtype=ak.bool, seed=seed)
        elif dtype == 'str':
            v = ak.random_strings_uniform(1, 16, Nv, seed=seed)
    else:   
        if dtype == 'str':
            v = ak.cast(ak.arange(Nv), 'str')
        else:
            v = ak.ones(Nv, dtype=dtype)
    
    timings = []
    for _ in range(trials):
        start = time.time()
        c = v[i]
        end = time.time()
        timings.append(end - start)
    tavg = sum(timings) / trials

    print("Average time = {:.4f} sec".format(tavg))
    if dtype == 'str':
        offsets_transferred = 3 * c.offsets.size * c.offsets.itemsize
        bytes_transferred = (c.offsets.size * c.offsets.itemsize) + (2 * c.bytes.size)
        bytes_per_sec = (offsets_transferred + bytes_transferred) / tavg
    else:
        bytes_per_sec = (c.size * c.itemsize * 3) / tavg
    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec/2**30))
Beispiel #29
0
#!/usr/bin/env python3

import importlib
import numpy as np
import math
import gc
import sys

import arkouda as ak

ak.verbose = False
if len(sys.argv) > 1:
    ak.connect(server=sys.argv[1], port=sys.argv[2])
else:
    ak.connect()

N = 10**9
a = ak.ones(N, dtype='int64')
b = ak.ones(N, dtype='int64')
print(a, b)

c = a + b
d = a - b
print(c)
print(d)

ak.shutdown()
Beispiel #30
0
import gc
import sys

import arkouda as ak

print(">>> Sanity checks on the arkouda_server")

ak.verbose = False
if len(sys.argv) > 1:
    ak.connect(server=sys.argv[1], port=sys.argv[2])
else:
    ak.connect()

N = 1000

a1 = ak.ones(N,dtype=np.int64)
a2 = ak.arange(0,N,1)
t1 = a1
t2 = a1 * 10
dt = 10

# should get N*N answers
I,J = ak.join_on_eq_with_dt(a1,a1,a1,a1,dt,"true_dt",result_limit=N*N)
print(I,J)
if (I.size == N*N) and (J.size == N*N):
    print("passed!")
else:
    print("failed!")

# should get N answers
I,J = ak.join_on_eq_with_dt(a2,a1,t1,t2,dt,"true_dt")