Beispiel #1
0
 def testSeededRNG(self):
     N = 100
     seed = 8675309
     numericdtypes = [ak.int64, ak.float64, ak.bool]
     for dt in numericdtypes:
         # Make sure unseeded runs differ
         a = ak.randint(0, 2**32, N, dtype=dt)
         b = ak.randint(0, 2**32, N, dtype=dt)
         self.assertFalse((a == b).all())
         # Make sure seeded results are same
         a = ak.randint(0, 2**32, N, dtype=dt, seed=seed)
         b = ak.randint(0, 2**32, N, dtype=dt, seed=seed)
         self.assertTrue((a == b).all())
     # Uniform
     self.assertFalse((ak.uniform(N) == ak.uniform(N)).all())
     self.assertTrue((ak.uniform(N, seed=seed) == ak.uniform(N, seed=seed)).all())
     # Standard Normal
     self.assertFalse((ak.standard_normal(N) == ak.standard_normal(N)).all())
     self.assertTrue((ak.standard_normal(N, seed=seed) == ak.standard_normal(N, seed=seed)).all())
     # Strings (uniformly distributed length)
     self.assertFalse((ak.random_strings_uniform(1, 10, N) == ak.random_strings_uniform(1, 10, N)).all())
     self.assertTrue((ak.random_strings_uniform(1, 10, N, seed=seed) == ak.random_strings_uniform(1, 10, N, seed=seed)).all())
     # Strings (log-normally distributed length)
     self.assertFalse((ak.random_strings_lognormal(2, 1, N) == ak.random_strings_lognormal(2, 1, N)).all())
     self.assertTrue((ak.random_strings_lognormal(2, 1, N, seed=seed) == ak.random_strings_lognormal(2, 1, N, seed=seed)).all())
Beispiel #2
0
    def test_string_registration_suite(self):
        cleanup()
        # Initial registration should set name
        keep = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable')
        self.assertTrue(keep.register("keep_me").name == "keep_me")
        self.assertTrue(keep.is_registered(),
                        "Expected Strings object to be registered")

        # Register a second time to confirm name change
        self.assertTrue(keep.register("kept").name == "kept")
        self.assertTrue(keep.is_registered(),
                        "Object should be registered with updated name")

        # Add an item to discard, confirm our registered item remains and discarded item is gone
        discard = ak.random_strings_uniform(1,
                                            10,
                                            UNIQUE,
                                            characters='printable')
        ak.clear()
        self.assertTrue(keep.name == "kept")
        with self.assertRaises(
                RuntimeError,
                msg="discard was not registered and should be discarded"):
            str(discard)

        # Unregister, should remain usable until we clear
        keep.unregister()
        str(keep)  # Should not cause error
        self.assertFalse(keep.is_registered(),
                         "This item should no longer be registered")
        ak.clear()
        with self.assertRaises(
                RuntimeError,
                msg="keep was unregistered and should be cleared"):
            str(keep)  # should cause RuntimeError

        # Test attach functionality
        s1 = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable')
        self.assertTrue(
            s1.register("uut").is_registered(), "uut should be registered")
        s1 = None
        self.assertTrue(s1 is None, "Reference should be cleared")
        s1 = ak.Strings.attach("uut")
        self.assertTrue(s1.is_registered(),
                        "Should have re-attached to registered object")
        str(
            s1
        )  # This will throw an exception if the object doesn't exist server-side

        # Test the Strings unregister by name using previously registered object
        ak.Strings.unregister_strings_by_name("uut")
        self.assertFalse(s1.is_registered(),
                         "Expected object to be unregistered")
        cleanup()
Beispiel #3
0
    def test_error_handling(self):
        stringsOne = ak.random_strings_uniform(1,
                                               10,
                                               UNIQUE,
                                               characters='printable')
        stringsTwo = ak.random_strings_uniform(1,
                                               10,
                                               UNIQUE,
                                               characters='printable')

        with self.assertRaises(TypeError) as cm:
            stringsOne.lstick(stringsTwo, delimiter=1)
        self.assertEqual(
            'type of argument "delimiter" must be one of (bytes, str, str_); got int instead',
            cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            stringsOne.lstick([1], 1)
        self.assertEqual(
            'type of argument "other" must be arkouda.strings.Strings; got list instead',
            cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            stringsOne.startswith(1)
        self.assertEqual(
            'type of argument "substr" must be one of (bytes, str, str_); got int instead',
            cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            stringsOne.endswith(1)
        self.assertEqual(
            'type of argument "substr" must be one of (bytes, str, str_); got int instead',
            cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            stringsOne.contains(1)
        self.assertEqual(
            'type of argument "substr" must be one of (bytes, str, str_); got int instead',
            cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            stringsOne.peel(1)
        self.assertEqual(
            'type of argument "delimiter" must be one of (bytes, str, str_); got int instead',
            cm.exception.args[0])

        with self.assertRaises(ValueError) as cm:
            stringsOne.peel("", -5)
        self.assertEqual('times must be >= 1', cm.exception.args[0])
Beispiel #4
0
    def test_random_strings_uniform_with_seed(self):
        pda = ak.random_strings_uniform(minlen=1, maxlen=5, seed=1, size=10)

        self.assertTrue((ak.array([
            'TVKJ', 'EWAB', 'CO', 'HFMD', 'U', 'MMGT', 'N', 'WOQN', 'HZ', 'VSX'
        ]) == pda).all())

        pda = ak.random_strings_uniform(minlen=1,
                                        maxlen=5,
                                        seed=1,
                                        size=10,
                                        characters='printable')
        self.assertTrue((ak.array([
            '+5"f', '-P]3', '4k', '~HFF', 'F', '`,IE', 'Y', 'jkBa', '9(', '5oZ'
        ]) == pda).all())
Beispiel #5
0
 def setUp(self):
     self.maxDiff = None
     ArkoudaTest.setUp(self)
     base_words1 = ak.random_strings_uniform(1,
                                             10,
                                             UNIQUE,
                                             characters='printable')
     base_words2 = ak.random_strings_lognormal(2,
                                               0.25,
                                               UNIQUE,
                                               characters='printable')
     gremlins = np.array(['"', ' ', ''])
     self.gremlins = ak.array(gremlins)
     self.base_words = ak.concatenate((base_words1, base_words2))
     self.np_base_words = np.hstack(
         (base_words1.to_ndarray(), base_words2.to_ndarray()))
     choices = ak.randint(0, self.base_words.size, N)
     self.strings = self.base_words[choices]
     self.test_strings = self.strings.to_ndarray()
     self.cat = ak.Categorical(self.strings)
     x, w = tuple(
         zip(*Counter(''.join(self.base_words.to_ndarray())).items()))
     self.delim = self._get_delimiter(x, w, gremlins)
     self.akset = set(ak.unique(self.strings).to_ndarray())
     self.gremlins_base_words = ak.concatenate(
         (self.base_words, self.gremlins))
     self.gremlins_strings = ak.concatenate(
         (self.base_words[choices], self.gremlins))
     self.gremlins_test_strings = self.gremlins_strings.to_ndarray()
     self.gremlins_cat = ak.Categorical(self.gremlins_strings)
Beispiel #6
0
    def testSaveAndLoadCategoricalMulti(self):
        """
        Test to build a pseudo dataframe with multiple categoricals, pdarrays, strings objects and successfully
        write/read it from HDF5
        """
        c1 = self._getCategorical(prefix="c1", size=51)
        c2 = self._getCategorical(prefix="c2", size=52)
        pda1 = ak.zeros(51)
        strings1 = ak.random_strings_uniform(9, 10, 52)

        with tempfile.TemporaryDirectory(
                dir=CategoricalTest.cat_test_base_tmp) as tmp_dirname:
            df = {"cat1": c1, "cat2": c2, "pda1": pda1, "strings1": strings1}
            ak.save_all(df, f"{tmp_dirname}/cat-save-test")
            x = ak.load_all(path_prefix=f"{tmp_dirname}/cat-save-test")
            self.assertTrue(len(x.items()) == 4)
            # Note assertCountEqual asserts a and b have the same elements in the same amount regardless of order
            self.assertCountEqual(x["cat1"].categories.to_ndarray().tolist(),
                                  c1.categories.to_ndarray().tolist())
            self.assertCountEqual(x["cat2"].categories.to_ndarray().tolist(),
                                  c2.categories.to_ndarray().tolist())
            self.assertCountEqual(x["pda1"].to_ndarray().tolist(),
                                  pda1.to_ndarray().tolist())
            self.assertCountEqual(x["strings1"].to_ndarray().tolist(),
                                  strings1.to_ndarray().tolist())
Beispiel #7
0
    def test_error_handling(self):
        stringsOne = ak.random_strings_uniform(1,
                                               10,
                                               UNIQUE,
                                               characters='printable')
        stringsTwo = ak.random_strings_uniform(1,
                                               10,
                                               UNIQUE,
                                               characters='printable')

        with self.assertRaises(TypeError) as cm:
            stringsOne.lstick(stringsTwo, delimiter=1)
        self.assertEqual('Delimiter must be a string, not int',
                         cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            stringsOne.lstick([1], 1)
        self.assertEqual('stick: not supported between String and list',
                         cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            stringsOne.startswith(1)
        self.assertEqual('Substring must be a string, not int',
                         cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            stringsOne.endswith(1)
        self.assertEqual('Substring must be a string, not int',
                         cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            stringsOne.contains(1)
        self.assertEqual('Substring must be a string, not int',
                         cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            stringsOne.peel(1)
        self.assertEqual('Delimiter must be a string, not int',
                         cm.exception.args[0])

        with self.assertRaises(ValueError) as cm:
            stringsOne.peel("", -5)
        self.assertEqual('Times must be >= 1', cm.exception.args[0])
Beispiel #8
0
 def setUp(self):
     ArkoudaTest.setUp(self)
     base_words1 = ak.random_strings_uniform(1,
                                             10,
                                             UNIQUE,
                                             characters='printable')
     base_words2 = ak.random_strings_lognormal(2,
                                               0.25,
                                               UNIQUE,
                                               characters='printable')
     base_sas1 = ak.suffix_array(base_words1)
     base_sas2 = ak.suffix_array(base_words2)
     '''
Beispiel #9
0
    def test_string_is_registered(self):
        """
        Tests the Strings.is_registered() function
        """
        keep = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable')
        self.assertFalse(keep.is_registered())

        keep.register('keep_me')
        self.assertTrue(keep.is_registered())

        keep.unregister()
        self.assertFalse(keep.is_registered())

        ak.clear()
Beispiel #10
0
    def test_in_place_info(self):
        """
        Tests the class level info method for pdarray, String, and Categorical
        """
        cleanup()
        my_pda = ak.ones(10, ak.int64)
        self.assertFalse(
            any([sym['registered'] for sym in json.loads(my_pda.info())]),
            msg=
            'no components of my_pda should be registered before register call'
        )
        my_pda.register('my_pda')
        self.assertTrue(
            all([sym['registered'] for sym in json.loads(my_pda.info())]),
            msg=
            'all components of my_pda should be registered after register call'
        )

        my_str = ak.random_strings_uniform(1,
                                           10,
                                           UNIQUE,
                                           characters='printable')
        self.assertFalse(
            any([sym['registered'] for sym in json.loads(my_str.info())]),
            msg=
            'no components of my_str should be registered before register call'
        )
        my_str.register('my_str')
        self.assertTrue(
            all([sym['registered'] for sym in json.loads(my_str.info())]),
            msg=
            'all components of my_str should be registered after register call'
        )

        my_cat = ak.Categorical(ak.array([f"my_cat {i}"
                                          for i in range(1, 11)]))
        self.assertFalse(
            any([sym['registered'] for sym in json.loads(my_cat.info())]),
            msg=
            'no components of my_cat should be registered before register call'
        )
        my_cat.register('my_cat')
        self.assertTrue(
            all([sym['registered'] for sym in json.loads(my_cat.info())]),
            msg=
            'all components of my_cat should be registered after register call'
        )
        cleanup()
Beispiel #11
0
 def setUp(self):
     ArkoudaTest.setUp(self)
     base_words1 = ak.random_strings_uniform(0,
                                             10,
                                             UNIQUE,
                                             characters='printable')
     base_words2 = ak.random_strings_lognormal(2,
                                               0.25,
                                               UNIQUE,
                                               characters='printable')
     self.base_words = ak.concatenate((base_words1, base_words2))
     self.np_base_words = np.hstack(
         (base_words1.to_ndarray(), base_words2.to_ndarray()))
     choices = ak.randint(0, self.base_words.size, N)
     self.strings = self.base_words[choices]
     self.test_strings = self.strings.to_ndarray()
     self.cat = ak.Categorical(self.strings)
     x, w = tuple(zip(*Counter(''.join(self.base_words)).items()))
     self.delim = np.random.choice(x, p=(np.array(w) / sum(w)))
Beispiel #12
0
    def test_string_is_registered(self):
        """
        Tests the Strings.is_registered() function
        """
        keep = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable')
        self.assertFalse(keep.is_registered())

        keep.register('keep_me')
        self.assertTrue(keep.is_registered())

        keep.unregister()
        self.assertFalse(keep.is_registered())

        # Now mess with one of the internal pieces to test is_registered() logic
        self.assertTrue(
            keep.register("uut").is_registered(), "Re-register keep as uut")
        ak.unregister_pdarray_by_name("uut.bytes")
        with self.assertRaises(
                RegistrationError,
                msg="Expected RegistrationError on mis-matched pieces"):
            keep.is_registered()

        ak.clear()
Beispiel #13
0
 def setUp(self):
     self.maxDiff = None
     ArkoudaTest.setUp(self)
     base_words1 = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable')
     base_words2 = ak.random_strings_lognormal(2, 0.25, UNIQUE, characters='printable')
     gremlins = ak.array(['"', ' ', ''])
     self.gremlins = gremlins
     self.base_words = ak.concatenate((base_words1, base_words2))
     self.np_base_words = np.hstack((base_words1.to_ndarray(), base_words2.to_ndarray()))
     choices = ak.randint(0, self.base_words.size, N)
     self.strings = self.base_words[choices]
     self.test_strings = self.strings.to_ndarray()
     self.cat = ak.Categorical(self.strings)
     x, w = tuple(zip(*Counter(''.join(self.base_words.to_ndarray())).items()))
     self.delim =  np.random.choice(x, p=(np.array(w)/sum(w)))
     self.akset = set(ak.unique(self.strings).to_ndarray())
     self.gremlins_base_words = base_words = ak.concatenate((base_words1, base_words2, gremlins))
     self.gremlins_strings = ak.concatenate((base_words[choices], gremlins))
     self.gremlins_test_strings = self.gremlins_strings.to_ndarray()
     self.gremlins_cat = ak.Categorical(self.gremlins_strings)
     print("=================In Class will check===========================")
     print("")
     print(str(base_words1))
     print("After base_word1 ")
     print("")
     print(str(self.strings))
     print("After Print strings")
     print(str(self.test_strings))
     print("")
     print("After Print teststrings")
     print(str(self.strings[N//3]))
     print("")
     print("After Print strings[N//3]")
     print(str(self.test_strings[N//3]))
     print("")
     print("After Print test_strings[N//3]")
    def test_random_strings_uniform(self):
        pda = ak.random_strings_uniform(minlen=1, maxlen=5, size=100)
        nda = pda.to_ndarray()

        self.assertIsInstance(pda, ak.Strings)
        self.assertEqual(100, len(pda))
        self.assertEqual(str, pda.dtype)
        for string in nda:
            self.assertTrue(len(string) >= 1 and len(string) <= 5)
            self.assertTrue(string.isupper())

        pda = ak.random_strings_uniform(minlen=np.int64(1),
                                        maxlen=np.int64(5),
                                        size=np.int64(100))
        nda = pda.to_ndarray()

        self.assertIsInstance(pda, ak.Strings)
        self.assertEqual(100, len(pda))
        self.assertEqual(str, pda.dtype)
        for string in nda:
            self.assertTrue(len(string) >= 1 and len(string) <= 5)
            self.assertTrue(string.isupper())

        with self.assertRaises(ValueError) as cm:
            ak.random_strings_uniform(maxlen=1, minlen=5, size=100)
        self.assertEqual(
            "Incompatible arguments: minlen < 0, maxlen < minlen, or size < 0",
            cm.exception.args[0])

        with self.assertRaises(ValueError) as cm:
            ak.random_strings_uniform(maxlen=5, minlen=1, size=-1)
        self.assertEqual(
            "Incompatible arguments: minlen < 0, maxlen < minlen, or size < 0",
            cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            ak.random_strings_uniform(minlen='1', maxlen=5, size=10)
        self.assertEqual(
            'type of argument "minlen" must be one of (int, int64); got str instead',
            cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            ak.random_strings_uniform(minlen=1, maxlen='5', size=10)
        self.assertEqual(
            'type of argument "maxlen" must be one of (int, int64); got str instead',
            cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            ak.random_strings_uniform(minlen=1, maxlen=5, size='10')
        self.assertEqual(
            'type of argument "size" must be one of (int, int64); got str instead',
            cm.exception.args[0])
Beispiel #15
0
        
if __name__ == '__main__':
    import sys
    if len(sys.argv) > 1:
        ak.connect(server=sys.argv[1], port=sys.argv[2])
    else:
        ak.connect()

    print("Running test from string_test.__main__")
    # with open(__file__, 'r') as f:
    #     base_words = np.array(f.read().split())
    # test_strings = np.random.choice(base_words, N, replace=True)
    # strings = ak.array(test_strings)

    base_words1 = ak.random_strings_uniform(1, 10, UNIQUE, characters='printable')
    base_words2 = ak.random_strings_lognormal(2, 0.25, UNIQUE, characters='printable')
    gremlins = ak.array(['"', ' ', ''])
    base_words = ak.concatenate((base_words1, base_words2))
    np_base_words = np.hstack((base_words1.to_ndarray(), base_words2.to_ndarray()))
    assert(compare_strings(base_words.to_ndarray(), np_base_words))
    choices = ak.randint(0, base_words.size, N)
    strings = base_words[choices]
    test_strings = strings.to_ndarray()
    cat = ak.Categorical(strings)
    print("strings =", strings)
    print("categorical =", cat)
    print("Generation and concatenate passed")
  
    # int index
    run_test_index(strings, test_strings, cat, range(-len(gremlins), 0))
Beispiel #16
0
    def test_random_strings_uniform(self):
        pda = ak.random_strings_uniform(minlen=1, maxlen=10, size=100)
        self.assertIsInstance(pda, ak.Strings)
        self.assertEqual(100, len(pda))
        self.assertEqual(str, pda.dtype)

        with self.assertRaises(ValueError) as cm:
            ak.random_strings_uniform(maxlen=1, minlen=5, size=100)
        self.assertEqual(
            "Incompatible arguments: minlen < 0, maxlen < minlen, or size < 0",
            cm.exception.args[0])

        with self.assertRaises(ValueError) as cm:
            ak.random_strings_uniform(maxlen=5, minlen=1, size=-1)
        self.assertEqual(
            "Incompatible arguments: minlen < 0, maxlen < minlen, or size < 0",
            cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            ak.random_strings_uniform(minlen='1', maxlen=5, size=10)
        self.assertEqual(
            'type of argument "minlen" must be int; got str instead',
            cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            ak.random_strings_uniform(minlen=1, maxlen='5', size=10)
        self.assertEqual(
            'type of argument "maxlen" must be int; got str instead',
            cm.exception.args[0])

        with self.assertRaises(TypeError) as cm:
            ak.random_strings_uniform(minlen=1, maxlen=5, size='10')
        self.assertEqual(
            'type of argument "size" must be int; got str instead',
            cm.exception.args[0])