Ejemplo n.º 1
0
    def testSaveAllLoadAllWithDict(self):
        '''
        Creates 2..n files from an input columns dict depending upon the number of 
        arkouda_server locales, retrieves all datasets and correspoding pdarrays, 
        and confirms they match inputs
        
        :return: None
        :raise: AssertionError if the input and returned datasets and pdarrays don't match
        '''
        self._create_file(columns=self.dict_columns,
                          path_prefix='{}/iotest_dict'.format(
                              IOTest.io_test_dir))
        retrieved_columns = ak.load_all('{}/iotest_dict'.format(
            IOTest.io_test_dir))

        self.assertEqual(3, len(retrieved_columns))
        self.assertEqual(self.dict_columns['int_tens_pdarray'].all(),
                         retrieved_columns['int_tens_pdarray'].all())
        self.assertEqual(self.dict_columns['int_hundreds_pdarray'].all(),
                         retrieved_columns['int_hundreds_pdarray'].all())
        self.assertEqual(self.dict_columns['float_pdarray'].all(),
                         retrieved_columns['float_pdarray'].all())
        self.assertEqual(
            3,
            len(
                ak.get_datasets('{}/iotest_dict_LOCALE0'.format(
                    IOTest.io_test_dir))))
Ejemplo n.º 2
0
    def testAppendMixedStringsDataset(self):
        strings_array = ak.array(
            ['string {}'.format(num) for num in list(range(0, 25))])
        strings_array.save('{}/append-multi-type-test'.format(
            IOTest.io_test_dir),
                           dataset='m_strings')
        m_floats = ak.array([x / 10.0 for x in range(0, 10)])
        m_ints = ak.array(list(range(0, 10)))
        ak.save_all({
            'm_floats': m_floats,
            'm_ints': m_ints
        },
                    '{}/append-multi-type-test'.format(IOTest.io_test_dir),
                    mode='append')
        r_mixed = ak.load_all('{}/append-multi-type-test'.format(
            IOTest.io_test_dir))

        self.assertIsNotNone(r_mixed['m_floats'])
        self.assertIsNotNone(r_mixed['m_ints'])

        r_floats = ak.sort(
            ak.load('{}/append-multi-type-test'.format(IOTest.io_test_dir),
                    dataset='m_floats'))
        r_ints = ak.sort(
            ak.load('{}/append-multi-type-test'.format(IOTest.io_test_dir),
                    dataset='m_ints'))
        self.assertTrue((m_floats == r_floats).all())
        self.assertTrue((m_ints == r_ints).all())

        strings = strings_array.to_ndarray()
        strings.sort()
        r_strings = r_mixed['m_strings'].to_ndarray()
        r_strings.sort()

        self.assertTrue((strings == r_strings).all())
Ejemplo n.º 3
0
    def testSaveAllLoadAllWithList(self):
        '''
        Creates 2..n files from an input columns and names list depending upon the number of 
        arkouda_server locales, retrieves all datasets and correspoding pdarrays, and confirms 
        they match inputs
        
        :return: None
        :raise: AssertionError if the input and returned datasets and pdarrays don't match
        '''
        self._create_file(columns=self.list_columns, 
                          prefix_path='{}/iotest_list'.format(IOTest.io_test_dir), 
                          names=self.names)
        retrieved_columns = ak.load_all(path_prefix='{}/iotest_list'.format(IOTest.io_test_dir))

        itp = self.list_columns[0].to_ndarray()
        itp.sort()
        ritp = retrieved_columns['int_tens_pdarray'].to_ndarray()
        ritp.sort()
        ihp = self.list_columns[1].to_ndarray()
        ihp.sort()
        rihp = retrieved_columns['int_hundreds_pdarray'].to_ndarray()
        rihp.sort()
        fp = self.list_columns[2].to_ndarray()
        fp.sort()
        rfp = retrieved_columns['float_pdarray'].to_ndarray()
        rfp.sort()

        self.assertEqual(4, len(retrieved_columns))
        self.assertTrue((itp == ritp).all())
        self.assertTrue((ihp == rihp).all())
        self.assertTrue((fp == rfp).all())      
        self.assertEqual(len(self.list_columns[3]), 
                         len(retrieved_columns['bool_pdarray']))    
        self.assertEqual(4, 
                      len(ak.get_datasets('{}/iotest_list_LOCALE0'.format(IOTest.io_test_dir))))
Ejemplo n.º 4
0
    def testSaveAndLoadCategoricalMulti(self):
        """
        Test to build a pseudo dataframe with multiple categoricals, pdarrays, strings objects and successfully
        write/read it from HDF5
        """
        c1 = self._getCategorical(prefix="c1", size=51)
        c2 = self._getCategorical(prefix="c2", size=52)
        pda1 = ak.zeros(51)
        strings1 = ak.random_strings_uniform(9, 10, 52)

        with tempfile.TemporaryDirectory(
                dir=CategoricalTest.cat_test_base_tmp) as tmp_dirname:
            df = {"cat1": c1, "cat2": c2, "pda1": pda1, "strings1": strings1}
            ak.save_all(df, f"{tmp_dirname}/cat-save-test")
            x = ak.load_all(path_prefix=f"{tmp_dirname}/cat-save-test")
            self.assertTrue(len(x.items()) == 4)
            # Note assertCountEqual asserts a and b have the same elements in the same amount regardless of order
            self.assertCountEqual(x["cat1"].categories.to_ndarray().tolist(),
                                  c1.categories.to_ndarray().tolist())
            self.assertCountEqual(x["cat2"].categories.to_ndarray().tolist(),
                                  c2.categories.to_ndarray().tolist())
            self.assertCountEqual(x["pda1"].to_ndarray().tolist(),
                                  pda1.to_ndarray().tolist())
            self.assertCountEqual(x["strings1"].to_ndarray().tolist(),
                                  strings1.to_ndarray().tolist())
Ejemplo n.º 5
0
    def testLoadAll(self):
        self._create_file(columns=self.dict_columns,
                          prefix_path='{}/iotest_dict_columns'.format(
                              IOTest.io_test_dir))

        results = ak.load_all(
            path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir))
        self.assertTrue('bool_pdarray' in results)
        self.assertTrue('float_pdarray' in results)
        self.assertTrue('int_tens_pdarray' in results)
        self.assertTrue('int_hundreds_pdarray' in results)

        # Test load_all with invalid prefix
        with self.assertRaises(ValueError):
            ak.load_all(
                path_prefix='{}/iotest_dict_column'.format(IOTest.io_test_dir))

        # Test load with invalid file
        with self.assertRaises(RuntimeError) as cm:
            ak.load_all(path_prefix='{}/not-a-file'.format(IOTest.io_test_dir))
        self.assertIn('Could not open on or more files with the file prefix',
                      cm.exception.args[0])
Ejemplo n.º 6
0
    def testSaveAndLoadCategorical(self):
        """
        Test to save categorical to hdf5 and read it back successfully
        """
        num_elems = 51  # _getCategorical starts counting at 1, so the size is really off by one
        cat = self._getCategorical(size=num_elems)
        with self.assertRaises(
                ValueError
        ):  # Expect error for mode not being append or truncate
            cat.save("foo", dataset="bar", mode="not_allowed")

        with tempfile.TemporaryDirectory(
                dir=CategoricalTest.cat_test_base_tmp) as tmp_dirname:
            dset_name = "categorical_array"  # name of categorical array

            # Test the save functionality & confirm via h5py
            cat.save(f"{tmp_dirname}/cat-save-test", dataset=dset_name)

            import h5py
            f = h5py.File(tmp_dirname + "/cat-save-test_LOCALE0000", mode="r")
            keys = set(f.keys())
            if pdarrayIO.ARKOUDA_HDF5_FILE_METADATA_GROUP in keys:  # Ignore the metadata group if it exists
                keys.remove(pdarrayIO.ARKOUDA_HDF5_FILE_METADATA_GROUP)
            self.assertEqual(len(keys), 4, "Expected 4 keys")
            self.assertSetEqual(
                set(f"categorical_array.{k}"
                    for k in cat._get_components_dict().keys()), keys)
            f.close()

            # Now try to read them back with load_all
            x = ak.load_all(path_prefix=f"{tmp_dirname}/cat-save-test")
            self.assertTrue(dset_name in x)
            cat_from_hdf = x[dset_name]

            expected_categories = [f"string {i}" for i in range(1, num_elems)]

            # Note assertCountEqual asserts a and b have the same elements in the same amount regardless of order
            self.assertCountEqual(
                cat_from_hdf.categories.to_ndarray().tolist(),
                expected_categories)

            # Asserting the optional components and sizes are correct for both constructors should be sufficient
            self.assertTrue(cat_from_hdf.segments is not None)
            self.assertTrue(cat_from_hdf.permutation is not None)
            print(f"==> cat_from_hdf.size:{cat_from_hdf.size}")
            self.assertTrue(cat_from_hdf.size == num_elems - 1)
Ejemplo n.º 7
0
    def testSaveAllLoadAllWithList(self):
        '''
        Creates 2..n files from an input columns and names list depending upon the number of 
        arkouda_server locales, retrieves all datasets and correspoding pdarrays, and confirms 
        they match inputs
        
        :return: None
        :raise: AssertionError if the input and returned datasets and pdarrays don't match
        '''
        self._create_file(columns=self.list_columns,
                          path_prefix='/tmp/iotest_list',
                          names=self.names)
        retrieved_columns = ak.load_all(path_prefix='/tmp/iotest_list')

        self.assertEqual(3, len(retrieved_columns))
        self.assertEqual(self.list_columns[0].all(),
                         retrieved_columns['int_tens_pdarray'].all())
        self.assertEqual(self.list_columns[1].all(),
                         retrieved_columns['int_hundreds_pdarray'].all())
        self.assertEqual(self.list_columns[2].all(),
                         retrieved_columns['float_pdarray'].all())
        self.assertEqual(3, len(ak.get_datasets('/tmp/iotest_list_LOCALE0')))