def testReadHdf(self): ''' Creates 2..n files depending upon the number of arkouda_server locales with two files each containing different-named datasets with the same pdarrays, reads the files with an explicit list of file names to the read_hdf method, and confirms the dataset was returned correctly. :return: None :raise: AssertionError if the input and returned datasets don't match ''' self._create_file(columns=self.dict_single_column, prefix_path='{}/iotest_single_column'.format(IOTest.io_test_dir)) self._create_file(columns=self.dict_single_column, prefix_path='{}/iotest_single_column_dupe'.format(IOTest.io_test_dir)) dataset = ak.read_hdf(dsetName='int_tens_pdarray', filenames=['{}/iotest_single_column_LOCALE0'.format(IOTest.io_test_dir), '{}/iotest_single_column_dupe_LOCALE0'.format(IOTest.io_test_dir)]) self.assertIsNotNone(dataset) with self.assertRaises(RuntimeError) as cm: ak.read_hdf(dsetName='in_tens_pdarray', filenames=['{}/iotest_single_column_LOCALE0'.format(IOTest.io_test_dir), '{}/iotest_single_column_dupe_LOCALE0'.format(IOTest.io_test_dir)]) self.assertTrue('Error: The dataset in_tens_pdarray does not exist in the file' in cm.exception.args[0]) with self.assertRaises(RuntimeError) as cm: ak.read_hdf(dsetName='int_tens_pdarray', filenames=['{}/iotest_single_colum_LOCALE0'.format(IOTest.io_test_dir), '{}/iotest_single_colum_dupe_LOCALE0'.format(IOTest.io_test_dir)]) self.assertTrue('iotest_single_colum_LOCALE0 not found' in cm.exception.args[0])
def testSaveStringsDataset(self): # Create, save, and load Strings dataset strings_array = ak.array( ['testing string{}'.format(num) for num in list(range(0, 25))]) strings_array.save('{}/strings-test'.format(IOTest.io_test_dir), dataset='strings') r_strings_array = ak.load('{}/strings-test'.format(IOTest.io_test_dir), dataset='strings') strings = strings_array.to_ndarray() strings.sort() r_strings = r_strings_array.to_ndarray() r_strings.sort() self.assertTrue((strings == r_strings).all()) # Read a part of a saved Strings dataset from one hdf5 file r_strings_subset = ak.read_all(filenames='{}/strings-test_LOCALE0000'.\ format(IOTest.io_test_dir)) self.assertIsNotNone(r_strings_subset) self.assertTrue(isinstance(r_strings_subset[0], str)) self.assertIsNotNone(ak.read_hdf(filenames='{}/strings-test_LOCALE0000'.\ format(IOTest.io_test_dir), dsetName='strings/values')) self.assertIsNotNone(ak.read_hdf(filenames='{}/strings-test_LOCALE0000'.\ format(IOTest.io_test_dir), dsetName='strings/segments')) # Repeat the test using the calc_string_offsets=True option to have server calculate offsets array r_strings_subset = ak.read_all( filenames=f'{IOTest.io_test_dir}/strings-test_LOCALE0000', calc_string_offsets=True) self.assertIsNotNone(r_strings_subset) self.assertTrue(isinstance(r_strings_subset[0], str)) self.assertIsNotNone( ak.read_hdf( filenames=f'{IOTest.io_test_dir}/strings-test_LOCALE0000', dsetName='strings/values', calc_string_offsets=True)) self.assertIsNotNone( ak.read_hdf( filenames=f'{IOTest.io_test_dir}/strings-test_LOCALE0000', dsetName='strings/segments', calc_string_offsets=True))
def testReadHdfWithGlob(self): ''' Creates 2..n files depending upon the number of arkouda_server locales with two files each containing different-named datasets with the same pdarrays, reads the files with the glob feature of the read_hdf method, and confirms the datasets and embedded pdarrays match the input dataset and pdarrays :return: None :raise: AssertionError if the input and returned datasets don't match ''' self._create_file(columns=self.dict_single_column, prefix_path='{}/iotest_single_column'.format(IOTest.io_test_dir)) self._create_file(columns=self.dict_single_column, prefix_path='{}/iotest_single_column_dupe'.format(IOTest.io_test_dir)) dataset = ak.read_hdf(dsetName='int_tens_pdarray', filenames='{}/iotest_single_column*'.format(IOTest.io_test_dir)) self.assertEqual(self.int_tens_pdarray.all(), dataset.all())
def testReadHdf(self): ''' Creates 2..n files depending upon the number of arkouda_server locales with two files each containing different-named datasets with the same pdarrays, reads the files with an explicit list of file names to the read_hdf method, and confirms the dataset was returned correctly. :return: None :raise: AssertionError if the input and returned datasets don't match ''' self._create_file(columns=self.dict_single_column, path_prefix='/tmp/iotest_single_column') self._create_file(columns=self.dict_single_column, path_prefix='/tmp/iotest_single_column_dupe') dataset = ak.read_hdf(dsetName='int_tens_pdarray', filenames=[ '/tmp/iotest_single_column_LOCALE0', '/tmp/iotest_single_column_dupe_LOCALE0' ]) self.assertIsNotNone(dataset)