def testSaveAllLoadAllWithDict(self): ''' Creates 2..n files from an input columns dict depending upon the number of arkouda_server locales, retrieves all datasets and correspoding pdarrays, and confirms they match inputs :return: None :raise: AssertionError if the input and returned datasets and pdarrays don't match ''' self._create_file(columns=self.dict_columns, path_prefix='{}/iotest_dict'.format( IOTest.io_test_dir)) retrieved_columns = ak.load_all('{}/iotest_dict'.format( IOTest.io_test_dir)) self.assertEqual(3, len(retrieved_columns)) self.assertEqual(self.dict_columns['int_tens_pdarray'].all(), retrieved_columns['int_tens_pdarray'].all()) self.assertEqual(self.dict_columns['int_hundreds_pdarray'].all(), retrieved_columns['int_hundreds_pdarray'].all()) self.assertEqual(self.dict_columns['float_pdarray'].all(), retrieved_columns['float_pdarray'].all()) self.assertEqual( 3, len( ak.get_datasets('{}/iotest_dict_LOCALE0'.format( IOTest.io_test_dir))))
def testAppendMixedStringsDataset(self): strings_array = ak.array( ['string {}'.format(num) for num in list(range(0, 25))]) strings_array.save('{}/append-multi-type-test'.format( IOTest.io_test_dir), dataset='m_strings') m_floats = ak.array([x / 10.0 for x in range(0, 10)]) m_ints = ak.array(list(range(0, 10))) ak.save_all({ 'm_floats': m_floats, 'm_ints': m_ints }, '{}/append-multi-type-test'.format(IOTest.io_test_dir), mode='append') r_mixed = ak.load_all('{}/append-multi-type-test'.format( IOTest.io_test_dir)) self.assertIsNotNone(r_mixed['m_floats']) self.assertIsNotNone(r_mixed['m_ints']) r_floats = ak.sort( ak.load('{}/append-multi-type-test'.format(IOTest.io_test_dir), dataset='m_floats')) r_ints = ak.sort( ak.load('{}/append-multi-type-test'.format(IOTest.io_test_dir), dataset='m_ints')) self.assertTrue((m_floats == r_floats).all()) self.assertTrue((m_ints == r_ints).all()) strings = strings_array.to_ndarray() strings.sort() r_strings = r_mixed['m_strings'].to_ndarray() r_strings.sort() self.assertTrue((strings == r_strings).all())
def testSaveAllLoadAllWithList(self): ''' Creates 2..n files from an input columns and names list depending upon the number of arkouda_server locales, retrieves all datasets and correspoding pdarrays, and confirms they match inputs :return: None :raise: AssertionError if the input and returned datasets and pdarrays don't match ''' self._create_file(columns=self.list_columns, prefix_path='{}/iotest_list'.format(IOTest.io_test_dir), names=self.names) retrieved_columns = ak.load_all(path_prefix='{}/iotest_list'.format(IOTest.io_test_dir)) itp = self.list_columns[0].to_ndarray() itp.sort() ritp = retrieved_columns['int_tens_pdarray'].to_ndarray() ritp.sort() ihp = self.list_columns[1].to_ndarray() ihp.sort() rihp = retrieved_columns['int_hundreds_pdarray'].to_ndarray() rihp.sort() fp = self.list_columns[2].to_ndarray() fp.sort() rfp = retrieved_columns['float_pdarray'].to_ndarray() rfp.sort() self.assertEqual(4, len(retrieved_columns)) self.assertTrue((itp == ritp).all()) self.assertTrue((ihp == rihp).all()) self.assertTrue((fp == rfp).all()) self.assertEqual(len(self.list_columns[3]), len(retrieved_columns['bool_pdarray'])) self.assertEqual(4, len(ak.get_datasets('{}/iotest_list_LOCALE0'.format(IOTest.io_test_dir))))
def testSaveAndLoadCategoricalMulti(self): """ Test to build a pseudo dataframe with multiple categoricals, pdarrays, strings objects and successfully write/read it from HDF5 """ c1 = self._getCategorical(prefix="c1", size=51) c2 = self._getCategorical(prefix="c2", size=52) pda1 = ak.zeros(51) strings1 = ak.random_strings_uniform(9, 10, 52) with tempfile.TemporaryDirectory( dir=CategoricalTest.cat_test_base_tmp) as tmp_dirname: df = {"cat1": c1, "cat2": c2, "pda1": pda1, "strings1": strings1} ak.save_all(df, f"{tmp_dirname}/cat-save-test") x = ak.load_all(path_prefix=f"{tmp_dirname}/cat-save-test") self.assertTrue(len(x.items()) == 4) # Note assertCountEqual asserts a and b have the same elements in the same amount regardless of order self.assertCountEqual(x["cat1"].categories.to_ndarray().tolist(), c1.categories.to_ndarray().tolist()) self.assertCountEqual(x["cat2"].categories.to_ndarray().tolist(), c2.categories.to_ndarray().tolist()) self.assertCountEqual(x["pda1"].to_ndarray().tolist(), pda1.to_ndarray().tolist()) self.assertCountEqual(x["strings1"].to_ndarray().tolist(), strings1.to_ndarray().tolist())
def testLoadAll(self): self._create_file(columns=self.dict_columns, prefix_path='{}/iotest_dict_columns'.format( IOTest.io_test_dir)) results = ak.load_all( path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir)) self.assertTrue('bool_pdarray' in results) self.assertTrue('float_pdarray' in results) self.assertTrue('int_tens_pdarray' in results) self.assertTrue('int_hundreds_pdarray' in results) # Test load_all with invalid prefix with self.assertRaises(ValueError): ak.load_all( path_prefix='{}/iotest_dict_column'.format(IOTest.io_test_dir)) # Test load with invalid file with self.assertRaises(RuntimeError) as cm: ak.load_all(path_prefix='{}/not-a-file'.format(IOTest.io_test_dir)) self.assertIn('Could not open on or more files with the file prefix', cm.exception.args[0])
def testSaveAndLoadCategorical(self): """ Test to save categorical to hdf5 and read it back successfully """ num_elems = 51 # _getCategorical starts counting at 1, so the size is really off by one cat = self._getCategorical(size=num_elems) with self.assertRaises( ValueError ): # Expect error for mode not being append or truncate cat.save("foo", dataset="bar", mode="not_allowed") with tempfile.TemporaryDirectory( dir=CategoricalTest.cat_test_base_tmp) as tmp_dirname: dset_name = "categorical_array" # name of categorical array # Test the save functionality & confirm via h5py cat.save(f"{tmp_dirname}/cat-save-test", dataset=dset_name) import h5py f = h5py.File(tmp_dirname + "/cat-save-test_LOCALE0000", mode="r") keys = set(f.keys()) if pdarrayIO.ARKOUDA_HDF5_FILE_METADATA_GROUP in keys: # Ignore the metadata group if it exists keys.remove(pdarrayIO.ARKOUDA_HDF5_FILE_METADATA_GROUP) self.assertEqual(len(keys), 4, "Expected 4 keys") self.assertSetEqual( set(f"categorical_array.{k}" for k in cat._get_components_dict().keys()), keys) f.close() # Now try to read them back with load_all x = ak.load_all(path_prefix=f"{tmp_dirname}/cat-save-test") self.assertTrue(dset_name in x) cat_from_hdf = x[dset_name] expected_categories = [f"string {i}" for i in range(1, num_elems)] # Note assertCountEqual asserts a and b have the same elements in the same amount regardless of order self.assertCountEqual( cat_from_hdf.categories.to_ndarray().tolist(), expected_categories) # Asserting the optional components and sizes are correct for both constructors should be sufficient self.assertTrue(cat_from_hdf.segments is not None) self.assertTrue(cat_from_hdf.permutation is not None) print(f"==> cat_from_hdf.size:{cat_from_hdf.size}") self.assertTrue(cat_from_hdf.size == num_elems - 1)
def testSaveAllLoadAllWithList(self): ''' Creates 2..n files from an input columns and names list depending upon the number of arkouda_server locales, retrieves all datasets and correspoding pdarrays, and confirms they match inputs :return: None :raise: AssertionError if the input and returned datasets and pdarrays don't match ''' self._create_file(columns=self.list_columns, path_prefix='/tmp/iotest_list', names=self.names) retrieved_columns = ak.load_all(path_prefix='/tmp/iotest_list') self.assertEqual(3, len(retrieved_columns)) self.assertEqual(self.list_columns[0].all(), retrieved_columns['int_tens_pdarray'].all()) self.assertEqual(self.list_columns[1].all(), retrieved_columns['int_hundreds_pdarray'].all()) self.assertEqual(self.list_columns[2].all(), retrieved_columns['float_pdarray'].all()) self.assertEqual(3, len(ak.get_datasets('/tmp/iotest_list_LOCALE0')))