Exemple #1
0
 def test_dtypes(self):
     f = ak.zeros(10, dtype=ak.float64)
     with self.assertRaises(TypeError) as cm:
         f.popcount()
     b = ak.zeros(10, dtype=ak.bool)
     with self.assertRaises(TypeError) as cm:
         ak.popcount(f)
    def test_zeros(self):
        intZeros = ak.zeros(5, dtype=ak.int64)
        self.assertIsInstance(intZeros, ak.pdarray)
        self.assertEqual(ak.int64, intZeros.dtype)

        floatZeros = ak.zeros(5, dtype=float)
        self.assertEqual(float, floatZeros.dtype)

        floatZeros = ak.zeros(5, dtype=ak.float64)
        self.assertEqual(ak.float64, floatZeros.dtype)

        boolZeros = ak.zeros(5, dtype=bool)
        self.assertEqual(bool, boolZeros.dtype)

        boolZeros = ak.zeros(5, dtype=ak.bool)
        self.assertEqual(ak.bool, boolZeros.dtype)

        zeros = ak.zeros('5')
        self.assertEqual(5, len(zeros))

        with self.assertRaises(TypeError):
            ak.zeros(5, dtype=ak.uint8)

        with self.assertRaises(TypeError):
            ak.zeros(5, dtype=str)
Exemple #3
0
def run_test_pdarray_index(strings, test_strings, cat):
    inds = ak.arange(0, strings.size, 10)
    assert (compare_strings(strings[inds].to_ndarray(),
                            test_strings[inds.to_ndarray()]))
    assert (compare_strings(cat[inds].to_ndarray(),
                            test_strings[inds.to_ndarray()]))
    logical = ak.zeros(strings.size, dtype=ak.bool)
    logical[inds] = True
    assert (compare_strings(strings[logical].to_ndarray(),
                            test_strings[logical.to_ndarray()]))
    # Indexing with a one-element pdarray (int) should return Strings array, not string scalar
    i = N // 2
    singleton = ak.array([i])
    result = strings[singleton]
    assert (isinstance(result, ak.Strings) and (result.size == 1))
    assert (result[0] == strings[i])
    # Logical indexing with all-False array should return empty Strings array
    logicalSingleton = ak.zeros(strings.size, dtype=ak.bool)
    result = strings[logicalSingleton]
    assert (isinstance(result, ak.Strings) and (result.size == 0))
    # Logical indexing with a single True should return one-element Strings array, not string scalar
    logicalSingleton[i] = True
    result = strings[logicalSingleton]
    assert (isinstance(result, ak.Strings) and (result.size == 1))
    assert (result[0] == strings[i])
Exemple #4
0
def compare_strategies(length, ncat, op, dtype):
    keys = ak.randint(0, ncat, length)
    if dtype == 'int64':
        vals = ak.randint(0, length//ncat, length)
    elif dtype == 'bool':
        vals = ak.zeros(length, dtype='bool')
        for i in np.random.randint(0, length, ncat//2):
            vals[i] = True
    else:
        vals = ak.linspace(-1, 1, length)        
    print("Global groupby", end=' ')                                        
    start = time()                                                
    gg = ak.GroupBy(keys, False)
    ggtime = time() - start
    print(ggtime)
    print("Global reduce", end=' ')
    start = time()
    gk, gv = gg.aggregate(vals, op)
    grtime = time() - start
    print(grtime)
    print("Local groupby", end=' ')
    start = time()
    lg = ak.GroupBy(keys, True)
    lgtime = time() - start
    print(lgtime)
    print("Local reduce", end=' ')
    start = time()
    lk, lv = lg.aggregate(vals, op)
    lrtime = time() - start
    print(lrtime)
    print(f"Keys match? {(gk == lk).all()}")
    print(f"Absolute diff of vals = {ak.abs(gv - lv).sum()}")
    return ggtime, grtime, lgtime, lrtime
    def testSaveAndLoadCategoricalMulti(self):
        """
        Test to build a pseudo dataframe with multiple categoricals, pdarrays, strings objects and successfully
        write/read it from HDF5
        """
        c1 = self._getCategorical(prefix="c1", size=51)
        c2 = self._getCategorical(prefix="c2", size=52)
        pda1 = ak.zeros(51)
        strings1 = ak.random_strings_uniform(9, 10, 52)

        with tempfile.TemporaryDirectory(
                dir=CategoricalTest.cat_test_base_tmp) as tmp_dirname:
            df = {"cat1": c1, "cat2": c2, "pda1": pda1, "strings1": strings1}
            ak.save_all(df, f"{tmp_dirname}/cat-save-test")
            x = ak.load_all(path_prefix=f"{tmp_dirname}/cat-save-test")
            self.assertTrue(len(x.items()) == 4)
            # Note assertCountEqual asserts a and b have the same elements in the same amount regardless of order
            self.assertCountEqual(x["cat1"].categories.to_ndarray().tolist(),
                                  c1.categories.to_ndarray().tolist())
            self.assertCountEqual(x["cat2"].categories.to_ndarray().tolist(),
                                  c2.categories.to_ndarray().tolist())
            self.assertCountEqual(x["pda1"].to_ndarray().tolist(),
                                  pda1.to_ndarray().tolist())
            self.assertCountEqual(x["strings1"].to_ndarray().tolist(),
                                  strings1.to_ndarray().tolist())
Exemple #6
0
 def test_zero_length_groupby(self):
     """
     This tests groupby boundary condition on a zero length pdarray, see Issue #900 for details
     """
     g = ak.GroupBy(ak.zeros(0, dtype=ak.int64))
     str(
         g.segments
     )  # passing condition, if this was deleted it will cause the test to fail
Exemple #7
0
def check_zeros(N):
    # create np version
    a = np.zeros(N)
    # create ak version
    b = ak.zeros(N)
    # print(a,b)
    c = a == b.to_ndarray()
    # print(type(c),c)
    return pass_fail(c.all())
Exemple #8
0
def generate_arrays(length, nkeys, nvals, dtype='int64'):
    keys = ak.randint(0, nkeys, length)
    if dtype == 'int64':
        vals = ak.randint(0, nvals, length)
    elif dtype == 'bool':
        vals = ak.zeros(length, dtype='bool')
        for i in np.random.randint(0, length, nkeys // 2):
            vals[i] = True
    else:
        vals = ak.linspace(-1, 1, length)
    return keys, vals
Exemple #9
0
def run_test_in1d(strings, cat, base_words):
    more_choices = ak.randint(0, UNIQUE, 100)
    akwords = base_words[more_choices]
    more_words = akwords.to_ndarray()
    matches = ak.in1d(strings, akwords)
    catmatches = ak.in1d(cat, akwords)
    assert((matches == catmatches).all())
    # Every word in matches should be in the target set
    for word in strings[matches].to_ndarray():
        assert(word in more_words)
    # Exhaustively find all matches to make sure we didn't miss any
    inds = ak.zeros(strings.size, dtype=ak.bool)
    for word in more_words:
        inds |= (strings == word)
    assert((inds == matches).all())
    def test_eros_like(self):
        intZeros = ak.zeros(5, dtype=ak.int64)
        intZerosLike = ak.zeros_like(intZeros)

        self.assertIsInstance(intZerosLike, ak.pdarray)
        self.assertEqual(ak.int64, intZerosLike.dtype)

        floatZeros = ak.ones(5, dtype=ak.float64)
        floatZerosLike = ak.ones_like(floatZeros)

        self.assertEqual(ak.float64, floatZerosLike.dtype)

        boolZeros = ak.ones(5, dtype=ak.bool)
        boolZerosLike = ak.ones_like(boolZeros)

        self.assertEqual(ak.bool, boolZerosLike.dtype)
Exemple #11
0
def check_float(N):
    a = ak.randint(0, 1, N, dtype=ak.float64)
    n = ak.randint(-1, 1, N, dtype=ak.float64)
    z = ak.zeros(N, dtype=ak.float64)

    perm = ak.coargsort([a])
    assert ak.is_sorted(a[perm])

    perm = ak.coargsort([a, n])
    assert ak.is_sorted(a[perm])

    perm = ak.coargsort([n, a])
    assert ak.is_sorted(n[perm])

    perm = ak.coargsort([z, a])
    assert ak.is_sorted(a[perm])

    perm = ak.coargsort([z, n])
    assert ak.is_sorted(n[perm])
Exemple #12
0
def check_int(N):
    z = ak.zeros(N, dtype=ak.int64)

    a2 = ak.randint(0, 2**16, N)
    b2 = ak.randint(0, 2**16, N)
    c2 = ak.randint(0, 2**16, N)
    d2 = ak.randint(0, 2**16, N)
    n2 = ak.randint(-(2**15), 2**15, N)

    perm = ak.coargsort([a2])
    assert ak.is_sorted(a2[perm])

    perm = ak.coargsort([n2])
    assert ak.is_sorted(n2[perm])

    perm = ak.coargsort([a2, b2, c2, d2])
    assert ak.is_sorted(a2[perm])

    perm = ak.coargsort([z, b2, c2, d2])
    assert ak.is_sorted(b2[perm])

    perm = ak.coargsort([z, z, c2, d2])
    assert ak.is_sorted(c2[perm])

    perm = ak.coargsort([z, z, z, d2])
    assert ak.is_sorted(d2[perm])


    a4 = ak.randint(0, 2**32, N)
    b4 = ak.randint(0, 2**32, N)
    n4 = ak.randint(-(2**31), 2**31, N)

    perm = ak.coargsort([a4])
    assert ak.is_sorted(a4[perm])

    perm = ak.coargsort([n4])
    assert ak.is_sorted(n4[perm])

    perm = ak.coargsort([a4, b4])
    assert ak.is_sorted(a4[perm])

    perm = ak.coargsort([b4, a4])
    assert ak.is_sorted(b4[perm])


    a8 = ak.randint(0, 2**64, N)
    b8 = ak.randint(0, 2**64, N)
    n8 = ak.randint(-(2**63), 2**64, N)
 

    perm = ak.coargsort([a8])
    assert ak.is_sorted(a8[perm])

    perm = ak.coargsort([n8])
    assert ak.is_sorted(n8[perm])

    perm = ak.coargsort([b8, a8])
    assert ak.is_sorted(b8[perm])

    from itertools import permutations

    all_perm = permutations([a2, a4, a8])
    for p in all_perm:
        perm = ak.coargsort(p)
        assert ak.is_sorted(p[0][perm])
Exemple #13
0
    print("pdarray bool index passed")

    # in1d and iter
    # more_words = np.random.choice(base_words, 100)
    # akwords = ak.array(more_words)
    more_choices = ak.randint(0, UNIQUE, 100)
    akwords = base_words[more_choices]
    more_words = akwords.to_ndarray()
    matches = ak.in1d(strings, akwords)
    catmatches = ak.in1d(cat, akwords)
    assert ((matches == catmatches).all())
    # Every word in matches should be in the target set
    for word in strings[matches]:
        assert (word in more_words)
    # Exhaustively find all matches to make sure we didn't miss any
    inds = ak.zeros(strings.size, dtype=ak.bool)
    for word in more_words:
        inds |= (strings == word)
    assert ((inds == matches).all())
    print("in1d and iter passed")

    # argsort
    test_argsort(strings, test_strings, cat)

    # unique
    test_unique(strings, test_strings, cat)

    # groupby
    g = ak.GroupBy(strings)
    gc = ak.GroupBy(cat)
    # Unique keys should be same result as ak.unique
Exemple #14
0
 def test_compare_zeros(self):
     # create np version
     nArray = np.zeros(N)
     # create ak version
     aArray = ak.zeros(N)
     self.assertEqual(nArray.all(), aArray.to_ndarray().all())