Beispiel #1
0
    def testLoad(self):
        '''
        Creates 1..n files depending upon the number of arkouda_server locales with three columns 
        AKA datasets, loads each corresponding dataset and confirms each corresponding pdarray 
        equals the input pdarray.
        
        :return: None
        :raise: AssertionError if the input and returned datasets (pdarrays) don't match
        '''
        self._create_file(columns=self.dict_columns,
                          path_prefix='{}/iotest_dict_columns'.format(
                              IOTest.io_test_dir))
        result_array_tens = ak.load(
            path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir),
            dataset='int_tens_pdarray')
        result_array_hundreds = ak.load(
            path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir),
            dataset='int_hundreds_pdarray')
        result_array_float = ak.load(
            path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir),
            dataset='float_pdarray')

        self.assertEqual(self.int_tens_pdarray.all(), result_array_tens.all())
        self.assertEqual(self.int_hundreds_pdarray.all(),
                         result_array_hundreds.all())
        self.assertEqual(self.float_pdarray.all(), result_array_float.all())
Beispiel #2
0
    def testAppendMixedStringsDataset(self):
        strings_array = ak.array(
            ['string {}'.format(num) for num in list(range(0, 25))])
        strings_array.save('{}/append-multi-type-test'.format(
            IOTest.io_test_dir),
                           dataset='m_strings')
        m_floats = ak.array([x / 10.0 for x in range(0, 10)])
        m_ints = ak.array(list(range(0, 10)))
        ak.save_all({
            'm_floats': m_floats,
            'm_ints': m_ints
        },
                    '{}/append-multi-type-test'.format(IOTest.io_test_dir),
                    mode='append')
        r_mixed = ak.load_all('{}/append-multi-type-test'.format(
            IOTest.io_test_dir))

        self.assertIsNotNone(r_mixed['m_floats'])
        self.assertIsNotNone(r_mixed['m_ints'])

        r_floats = ak.sort(
            ak.load('{}/append-multi-type-test'.format(IOTest.io_test_dir),
                    dataset='m_floats'))
        r_ints = ak.sort(
            ak.load('{}/append-multi-type-test'.format(IOTest.io_test_dir),
                    dataset='m_ints'))
        self.assertTrue((m_floats == r_floats).all())
        self.assertTrue((m_ints == r_ints).all())

        strings = strings_array.to_ndarray()
        strings.sort()
        r_strings = r_mixed['m_strings'].to_ndarray()
        r_strings.sort()

        self.assertTrue((strings == r_strings).all())
Beispiel #3
0
    def testAppendStringsDataset(self):
        strings_array = ak.array(['string {}'.format(num) for num in list(range(0,25))])
        strings_array.save('{}/append-strings-test'.format(IOTest.io_test_dir), 
                           dataset='strings')
        strings_array.save('{}/append-strings-test'.format(IOTest.io_test_dir), 
                           dataset='strings-dupe', mode='append')

        r_strings = ak.load('{}/append-strings-test'.format(IOTest.io_test_dir), 
                                 dataset='strings')
        r_strings_dupe = ak.load('{}/append-strings-test'.format(IOTest.io_test_dir), 
                                 dataset='strings-dupe')  
        self.assertTrue((r_strings == r_strings_dupe).all())
Beispiel #4
0
    def testSmallStringArrayToHDF5(self):
        a1 = ak.array(["ab", "cd"])
        with tempfile.TemporaryDirectory(
                dir=IOTest.io_test_dir) as tmp_dirname:
            a1.save(f"{tmp_dirname}/small_string_array", dataset="a1")
            # Now load it back in
            a2 = ak.load(f"{tmp_dirname}/small_string_array", dataset="a1")
            self.assertEqual(str(a1), str(a2))

        # Test a single string
        b1 = ak.array(["123456789"])
        with tempfile.TemporaryDirectory(
                dir=IOTest.io_test_dir) as tmp_dirname:
            b1.save(f"{tmp_dirname}/single_string", dataset="b1")
            # Now load it back in
            b2 = ak.load(f"{tmp_dirname}/single_string", dataset="b1")
            self.assertEqual(str(b1), str(b2))
Beispiel #5
0
 def testSmallArrayToHDF5(self):
     a1 = ak.array([1])
     with tempfile.TemporaryDirectory(
             dir=IOTest.io_test_dir) as tmp_dirname:
         a1.save(f"{tmp_dirname}/small_numeric", dataset="a1")
         # Now load it back in
         a2 = ak.load(f"{tmp_dirname}/small_numeric", dataset="a1")
         self.assertEqual(str(a1), str(a2))
Beispiel #6
0
    def testLoad(self):
        '''
        Creates 1..n files depending upon the number of arkouda_server locales with three columns 
        AKA datasets, loads each corresponding dataset and confirms each corresponding pdarray 
        equals the input pdarray.
        
        :return: None
        :raise: AssertionError if the input and returned datasets (pdarrays) don't match
        '''
        self._create_file(columns=self.dict_columns,
                          prefix_path='{}/iotest_dict_columns'.format(
                              IOTest.io_test_dir))
        result_array_tens = ak.load(
            path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir),
            dataset='int_tens_pdarray')
        result_array_hundreds = ak.load(
            path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir),
            dataset='int_hundreds_pdarray')
        result_array_floats = ak.load(
            path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir),
            dataset='float_pdarray')
        result_array_bools = ak.load(
            path_prefix='{}/iotest_dict_columns'.format(IOTest.io_test_dir),
            dataset='bool_pdarray')

        ratens = result_array_tens.to_ndarray()
        ratens.sort()

        rahundreds = result_array_hundreds.to_ndarray()
        rahundreds.sort()

        rafloats = result_array_floats.to_ndarray()
        rafloats.sort()

        self.assertTrue((self.int_tens_ndarray == ratens).all())
        self.assertTrue((self.int_hundreds_ndarray == rahundreds).all())
        self.assertTrue((self.float_ndarray == rafloats).all())
        self.assertEqual(len(self.bool_pdarray), len(result_array_bools))

        # Test load with invalid prefix
        with self.assertRaises(RuntimeError) as cm:
            ak.load(path_prefix='{}/iotest_dict_column'.format(
                IOTest.io_test_dir),
                    dataset='int_tens_pdarray')
        self.assertIn(
            'either corresponds to files inaccessible to Arkouda or files of an invalid format',
            cm.exception.args[0].args[0])

        # Test load with invalid file
        with self.assertRaises(RuntimeError) as cm:
            ak.load(path_prefix='{}/not-a-file'.format(IOTest.io_test_dir),
                    dataset='int_tens_pdarray')
        cm.exception.args[0]
        self.assertIn('is not an HDF5 file', cm.exception.args[0].args[0])
Beispiel #7
0
    def testInternalVersions(self):
        """
        Test loading internal arkouda hdf5 structuring by loading v0 and v1 files.
        v1 contains _arkouda_metadata group and attributes, v0 does not.
        Files are located under `test/resources` ... where server-side unit tests are located.
        """
        # Note: pytest unit tests are located under "tests/" vs chapel "test/"
        # The test files are located in the Chapel `test/resources` directory
        # Determine where the test was launched by inspecting our path and update it accordingly
        cwd = os.getcwd()
        if cwd.endswith(
                "tests"):  # IDEs may launch unit tests from this location
            cwd = cwd[:-1] + "/resources"
        else:  # assume arkouda root dir
            cwd += "/test/resources"

        # Now that we've figured out our loading path, load the files and test the lengths
        v0 = ak.load(cwd + "/array_v0.hdf5")
        v1 = ak.load(cwd + "/array_v1.hdf5")
        self.assertEqual(50, v0.size)
        self.assertEqual(50, v1.size)
Beispiel #8
0
    def testSaveLongStringsDataset(self):
        # Create, save, and load Strings dataset
        strings = ak.array(['testing a longer string{} to be written, loaded and appended'.\
                                  format(num) for num in list(range(0,26))])
        strings.save('{}/strings-test'.format(IOTest.io_test_dir), dataset='strings')

        n_strings = strings.to_ndarray()
        n_strings.sort()
        r_strings = ak.load('{}/strings-test'.format(IOTest.io_test_dir), 
                                  dataset='strings').to_ndarray()
        r_strings.sort()

        self.assertTrue((n_strings == r_strings).all())       
Beispiel #9
0
 def testUint64ToFromHDF5(self):
     """
     Test our ability to read/write uint64 to HDF5
     """
     npa1 = np.array(
         [18446744073709551500, 18446744073709551501, 18446744073709551502],
         dtype=np.uint64)
     pda1 = ak.array(npa1)
     with tempfile.TemporaryDirectory(
             dir=IOTest.io_test_dir) as tmp_dirname:
         pda1.save(f"{tmp_dirname}/small_numeric", dataset="pda1")
         # Now load it back in
         pda2 = ak.load(f"{tmp_dirname}/small_numeric", dataset="pda1")
         self.assertEqual(str(pda1), str(pda2))
         self.assertEqual(18446744073709551500, pda2[0])
         self.assertTrue((pda2.to_ndarray() == npa1).all())
Beispiel #10
0
 def testStringsWithoutOffsets(self):
     """
     This tests both saving & reading a strings array without saving and reading the offsets to HDF5.
     Instead the offsets array will be derived from the values/bytes area by looking for null-byte terminator strings
     """
     strings_array = ak.array(
         ['testing string{}'.format(num) for num in list(range(0, 25))])
     strings_array.save('{}/strings-test'.format(IOTest.io_test_dir),
                        dataset='strings',
                        save_offsets=False)
     r_strings_array = ak.load('{}/strings-test'.format(IOTest.io_test_dir),
                               dataset='strings',
                               calc_string_offsets=True)
     strings = strings_array.to_ndarray()
     strings.sort()
     r_strings = r_strings_array.to_ndarray()
     r_strings.sort()
     self.assertTrue((strings == r_strings).all())
Beispiel #11
0
    def testSaveStringsDataset(self):
        # Create, save, and load Strings dataset
        strings_array = ak.array(
            ['testing string{}'.format(num) for num in list(range(0, 25))])
        strings_array.save('{}/strings-test'.format(IOTest.io_test_dir),
                           dataset='strings')
        r_strings_array = ak.load('{}/strings-test'.format(IOTest.io_test_dir),
                                  dataset='strings')

        strings = strings_array.to_ndarray()
        strings.sort()
        r_strings = r_strings_array.to_ndarray()
        r_strings.sort()
        self.assertTrue((strings == r_strings).all())

        # Read a part of a saved Strings dataset from one hdf5 file
        r_strings_subset = ak.read_all(filenames='{}/strings-test_LOCALE0000'.\
                                    format(IOTest.io_test_dir))
        self.assertIsNotNone(r_strings_subset)
        self.assertTrue(isinstance(r_strings_subset[0], str))
        self.assertIsNotNone(ak.read_hdf(filenames='{}/strings-test_LOCALE0000'.\
                            format(IOTest.io_test_dir), dsetName='strings/values'))
        self.assertIsNotNone(ak.read_hdf(filenames='{}/strings-test_LOCALE0000'.\
                            format(IOTest.io_test_dir), dsetName='strings/segments'))

        # Repeat the test using the calc_string_offsets=True option to have server calculate offsets array
        r_strings_subset = ak.read_all(
            filenames=f'{IOTest.io_test_dir}/strings-test_LOCALE0000',
            calc_string_offsets=True)
        self.assertIsNotNone(r_strings_subset)
        self.assertTrue(isinstance(r_strings_subset[0], str))
        self.assertIsNotNone(
            ak.read_hdf(
                filenames=f'{IOTest.io_test_dir}/strings-test_LOCALE0000',
                dsetName='strings/values',
                calc_string_offsets=True))
        self.assertIsNotNone(
            ak.read_hdf(
                filenames=f'{IOTest.io_test_dir}/strings-test_LOCALE0000',
                dsetName='strings/segments',
                calc_string_offsets=True))