Esempio n. 1
0
 def test_pdarrays_datatypes(self):
     self.assertEqual(dtypes.dtype('float64'), ak.ones(10).dtype)
     self.assertEqual(
         dtypes.dtype('str'),
         ak.array(['string {}'.format(i) for i in range(0, 10)]).dtype)
Esempio n. 2
0
    else:
        ak.connect()

    # with open(__file__, 'r') as f:
    #     base_words = np.array(f.read().split())
    # test_strings = np.random.choice(base_words, N, replace=True)
    # strings = ak.array(test_strings)

    print("===============main=============================")
    base_words1 = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable')
    print("base_words1=")
    print(str(base_word1))
    base_words2 = ak.random_strings_lognormal(2, 0.25, UNIQUE, characters='printable')
    print("base_words2=")
    print(str(base_word2))
    gremlins = ak.array(['"', ' ', ''])
    base_words = ak.concatenate((base_words1, base_words2))
    print("base_words=")
    print(str(base_word))
    np_base_words = np.hstack((base_words1.to_ndarray(), base_words2.to_ndarray()))
    assert(compare_strings(base_words.to_ndarray(), np_base_words))
    choices = ak.randint(0, base_words.size, N)
    strings = base_words[choices]
    test_strings = strings.to_ndarray()
    cat = ak.Categorical(strings)
    print("strings =", strings)
    print("categorical =", cat)
    print("Generation and concatenate passed")
  
    # int index
    print("")
Esempio n. 3
0
    def testArrayCreation(self):
        pda = ak.array(np.ones(100))
        self.assertIsInstance(pda, ak.pdarray)
        self.assertEqual(100, len(pda))
        self.assertEqual(float, pda.dtype)

        pda = ak.array(list(range(0, 100)))
        self.assertIsInstance(pda, ak.pdarray)
        self.assertEqual(100, len(pda))
        self.assertEqual(int, pda.dtype)

        pda = ak.array((range(5)))
        self.assertIsInstance(pda, ak.pdarray)
        self.assertEqual(5, len(pda))
        self.assertEqual(int, pda.dtype)

        pda = ak.array(deque(range(5)))
        self.assertIsInstance(pda, ak.pdarray)
        self.assertEqual(5, len(pda))
        self.assertEqual(int, pda.dtype)

        with self.assertRaises(RuntimeError) as cm:
            ak.array({range(0, 100)})
        self.assertEqual("Only rank-1 pdarrays or ndarrays supported",
                         cm.exception.args[0])

        with self.assertRaises(RuntimeError) as cm:
            ak.array(np.array([[0, 1], [0, 1]]))
        self.assertEqual("Only rank-1 pdarrays or ndarrays supported",
                         cm.exception.args[0])

        with self.assertRaises(RuntimeError) as cm:
            ak.array('not an iterable')
        self.assertEqual("Only rank-1 pdarrays or ndarrays supported",
                         cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            ak.array(list(list(0)))
        self.assertEqual("'int' object is not iterable", cm.exception.args[0])
Esempio n. 4
0
    def test_mulitdimensional_array_creation(self):
        with self.assertRaises(RuntimeError) as cm:
            ak.array([[0, 0], [0, 1], [1, 1]])

        self.assertEqual('Only rank-1 pdarrays or ndarrays supported',
                         cm.exception.args[0])
Esempio n. 5
0
 def _getRandomizedCategorical(self) -> ak.Categorical:
     return ak.Categorical(
         ak.array([
             'string', 'string1', 'non-string', 'non-string2', 'string',
             'non-string', 'string3', 'non-string2', 'string', 'non-string'
         ]))
Esempio n. 6
0
 def testGroup(self):
     group = self._getRandomizedCategorical().group()
     self.assertTrue((ak.array([2, 5, 9, 6, 1, 3, 7, 0, 4,
                                8]) == group).all())
Esempio n. 7
0
 def _getCategorical(self,
                     prefix: str = 'string',
                     size: int = 11) -> ak.Categorical:
     return ak.Categorical(
         ak.array(['{} {}'.format(prefix, i) for i in range(1, size)]))
Esempio n. 8
0
 def setUp(self):
     ArkoudaTest.setUp(self)
     self.a = ak.arange(10)
     self.edgeCases = ak.array([-(2**63), -1, 2**63 - 1])
Esempio n. 9
0
    def test_error_handling(self):
        d = make_arrays()
        akdf = {k:ak.array(v) for k, v in d.items()}        
        gb = ak.GroupBy([akdf['keys'], akdf['keys2']])
        
        with self.assertRaises(TypeError) as cm:
            ak.GroupBy(self.bvalues)  
        self.assertEqual('GroupBy only supports pdarrays with a dtype int64', 
                         cm.exception.args[0])    
        
        with self.assertRaises(TypeError) as cm:
            ak.GroupBy(self.fvalues)  
        self.assertEqual('GroupBy only supports pdarrays with a dtype int64', 
                         cm.exception.args[0])              

        with self.assertRaises(TypeError) as cm:
            gb.broadcast([])
        self.assertEqual('type of argument "values" must be arkouda.pdarrayclass.pdarray; got list instead', 
                         cm.exception.args[0])  
        
        with self.assertRaises(TypeError) as cm:
            self.igb.nunique(ak.randint(0,1,10,dtype=bool))
        self.assertEqual('the pdarray dtype must be int64', 
                         cm.exception.args[0])  

        with self.assertRaises(TypeError) as cm:
            self.igb.nunique(ak.randint(0,1,10,dtype=float64))
        self.assertEqual('the pdarray dtype must be int64', 
                         cm.exception.args[0])  
        
        with self.assertRaises(TypeError) as cm:
            self.igb.any(ak.randint(0,1,10,dtype=float64))
        self.assertEqual('any is only supported for pdarrays of dtype bool', 
                         cm.exception.args[0])  

        with self.assertRaises(TypeError) as cm:
            self.igb.any(ak.randint(0,1,10,dtype=int64))
        self.assertEqual('any is only supported for pdarrays of dtype bool', 
                         cm.exception.args[0])  
        
        with self.assertRaises(TypeError) as cm:
            self.igb.all(ak.randint(0,1,10,dtype=float64))
        self.assertEqual('all is only supported for pdarrays of dtype bool', 
                         cm.exception.args[0])  

        with self.assertRaises(TypeError) as cm:
            self.igb.all(ak.randint(0,1,10,dtype=int64))
        self.assertEqual('all is only supported for pdarrays of dtype bool', 
                         cm.exception.args[0])  
        
        with self.assertRaises(TypeError) as cm:
            self.igb.min(ak.randint(0,1,10,dtype=bool))
        self.assertEqual('min is only supported for pdarrays of dtype float64 and int64', 
                         cm.exception.args[0])  

        with self.assertRaises(TypeError) as cm:
            self.igb.max(ak.randint(0,1,10,dtype=bool))
        self.assertEqual('max is only supported for pdarrays of dtype float64 and int64', 
                         cm.exception.args[0])  
        
        with self.assertRaises(TypeError) as cm:
            self.igb.argmin(ak.randint(0,1,10,dtype=bool))
        self.assertEqual('argmin is only supported for pdarrays of dtype float64 and int64', 
                         cm.exception.args[0])  

        with self.assertRaises(TypeError) as cm:
            self.igb.argmax(ak.randint(0,1,10,dtype=bool))
        self.assertEqual('argmax is only supported for pdarrays of dtype float64 and int64', 
                         cm.exception.args[0])  
Esempio n. 10
0
def run_test(levels, verbose=False):
    '''
    The run_test method enables execution of ak.GroupBy and ak.GroupBy.Reductions
    on a randomized set of arrays on the specified number of levels. 
    
    Note: the current set of valid levels is {1,2}
    :return: 
    '''
    d = make_arrays()
    df = pd.DataFrame(d)
    akdf = {k: ak.array(v) for k, v in d.items()}

    if levels == 1:
        akg = ak.GroupBy(akdf['keys'])
        keyname = 'keys'
    elif levels == 2:
        akg = ak.GroupBy([akdf['keys'], akdf['keys2']])
        keyname = ['keys', 'keys2']
    tests = 0
    failures = 0
    not_impl = 0
    if verbose: print(f"Doing .count()")
    tests += 1
    pdkeys, pdvals = groupby_to_arrays(df, keyname, 'int64', 'count', levels)
    akkeys, akvals = akg.count()
    akvals = akvals.to_ndarray()
    failures += compare_keys(pdkeys, akkeys, levels, pdvals, akvals)
    for vname in ('int64', 'float64', 'bool'):
        for op in ak.GroupBy.Reductions:
            if verbose: print(f"\nDoing aggregate({vname}, {op})")
            tests += 1
            do_check = True
            try:
                pdkeys, pdvals = groupby_to_arrays(df, keyname, vname, op,
                                                   levels)
            except Exception as E:
                if verbose: print("Pandas does not implement")
                do_check = False
            try:
                akkeys, akvals = akg.aggregate(akdf[vname], op)
                akvals = akvals.to_ndarray()
            except RuntimeError as E:
                if verbose: print("Arkouda error: ", E)
                not_impl += 1
                do_check = False
                continue
            if not do_check:
                continue
            if op.startswith('arg'):
                pdextrema = df[vname][pdvals]
                akextrema = akdf[vname][ak.array(akvals)].to_ndarray()
                if not np.allclose(pdextrema, akextrema):
                    print(
                        f"Different argmin/argmax: Arkouda failed to find an extremum"
                    )
                    print("pd: ", pdextrema)
                    print("ak: ", akextrema)
                    failures += 1
            else:
                failures += compare_keys(pdkeys, akkeys, levels, pdvals,
                                         akvals)
    print(
        f"{tests - failures - not_impl} / {tests - not_impl} passed, {failures} errors, {not_impl} not implemented"
    )
    return failures
Esempio n. 11
0
 def testUnique(self):
     cat = self._getRandomizedCategorical()
     
     self.assertTrue((ak.Categorical(ak.array(['non-string', 'string3', 'string1', 
                                     'non-string2', 'string'])).to_ndarray() 
                                               == cat.unique().to_ndarray()).all())