Beispiel #1
0
 def testHdfUnsanitizedNames(self):
     # Test when quotes are part of the dataset name
     my_arrays = {'foo"0"': ak.arange(100), 'bar"': ak.arange(100)}
     with tempfile.TemporaryDirectory(
             dir=IOTest.io_test_dir) as tmp_dirname:
         ak.save_all(my_arrays, f"{tmp_dirname}/bad_dataset_names")
         ak.read_all(f"{tmp_dirname}/bad_dataset_names*")
Beispiel #2
0
    def testAppendMixedStringsDataset(self):
        strings_array = ak.array(
            ['string {}'.format(num) for num in list(range(0, 25))])
        strings_array.save('{}/append-multi-type-test'.format(
            IOTest.io_test_dir),
                           dataset='m_strings')
        m_floats = ak.array([x / 10.0 for x in range(0, 10)])
        m_ints = ak.array(list(range(0, 10)))
        ak.save_all({
            'm_floats': m_floats,
            'm_ints': m_ints
        },
                    '{}/append-multi-type-test'.format(IOTest.io_test_dir),
                    mode='append')
        r_mixed = ak.load_all('{}/append-multi-type-test'.format(
            IOTest.io_test_dir))

        self.assertIsNotNone(r_mixed['m_floats'])
        self.assertIsNotNone(r_mixed['m_ints'])

        r_floats = ak.sort(
            ak.load('{}/append-multi-type-test'.format(IOTest.io_test_dir),
                    dataset='m_floats'))
        r_ints = ak.sort(
            ak.load('{}/append-multi-type-test'.format(IOTest.io_test_dir),
                    dataset='m_ints'))
        self.assertTrue((m_floats == r_floats).all())
        self.assertTrue((m_ints == r_ints).all())

        strings = strings_array.to_ndarray()
        strings.sort()
        r_strings = r_mixed['m_strings'].to_ndarray()
        r_strings.sort()

        self.assertTrue((strings == r_strings).all())
Beispiel #3
0
    def testSaveAndLoadCategoricalMulti(self):
        """
        Test to build a pseudo dataframe with multiple categoricals, pdarrays, strings objects and successfully
        write/read it from HDF5
        """
        c1 = self._getCategorical(prefix="c1", size=51)
        c2 = self._getCategorical(prefix="c2", size=52)
        pda1 = ak.zeros(51)
        strings1 = ak.random_strings_uniform(9, 10, 52)

        with tempfile.TemporaryDirectory(
                dir=CategoricalTest.cat_test_base_tmp) as tmp_dirname:
            df = {"cat1": c1, "cat2": c2, "pda1": pda1, "strings1": strings1}
            ak.save_all(df, f"{tmp_dirname}/cat-save-test")
            x = ak.load_all(path_prefix=f"{tmp_dirname}/cat-save-test")
            self.assertTrue(len(x.items()) == 4)
            # Note assertCountEqual asserts a and b have the same elements in the same amount regardless of order
            self.assertCountEqual(x["cat1"].categories.to_ndarray().tolist(),
                                  c1.categories.to_ndarray().tolist())
            self.assertCountEqual(x["cat2"].categories.to_ndarray().tolist(),
                                  c2.categories.to_ndarray().tolist())
            self.assertCountEqual(x["pda1"].to_ndarray().tolist(),
                                  pda1.to_ndarray().tolist())
            self.assertCountEqual(x["strings1"].to_ndarray().tolist(),
                                  strings1.to_ndarray().tolist())
Beispiel #4
0
 def _create_file(self, prefix_path : str, columns : Union[Mapping[str,ak.array]], 
                                        names : List[str]=None) -> None:
     '''
     Creates an hdf5 file with dataset(s) from the specified columns and path prefix
     via the ak.save_all method. If columns is a List, then the names list is used 
     to create the datasets
     
     :return: None
     :raise: ValueError if the names list is None when columns is a list
     '''       
     if isinstance(columns, dict):
         ak.save_all(columns=columns, prefix_path=prefix_path)   
     else:
         if not names:
             raise ValueError('the names list must be not None if columns is a list')
         ak.save_all(columns=columns, prefix_path=prefix_path, names=names)