コード例 #1
0
    def testReadAllWithErrorAndWarn(self):
        self._create_file(
            columns=self.dict_single_column,
            prefix_path=f'{IOTest.io_test_dir}/iotest_single_column')
        self._create_file(
            columns=self.dict_single_column,
            prefix_path=f'{IOTest.io_test_dir}/iotest_single_column_dupe')

        # Make sure we can read ok
        dataset = ak.read_all(filenames=[
            f'{IOTest.io_test_dir}/iotest_single_column_LOCALE0000',
            f'{IOTest.io_test_dir}/iotest_single_column_dupe_LOCALE0000'
        ])
        self.assertIsNotNone(dataset, "Expected dataset to be populated")

        # Change the name of the first file we try to raise an error due to file missing.
        with self.assertRaises(RuntimeError):
            dataset = ak.read_all(filenames=[
                f'{IOTest.io_test_dir}/iotest_MISSING_single_column_LOCALE0000',
                f'{IOTest.io_test_dir}/iotest_single_column_dupe_LOCALE0000'
            ])

        # Run the same test with missing file, but this time with the warning flag for read_all
        with pytest.warns(
                RuntimeWarning,
                match=r"There were .* errors reading files on the server.*"):
            dataset = ak.read_all(filenames=[
                f'{IOTest.io_test_dir}/iotest_MISSING_single_column_LOCALE0000',
                f'{IOTest.io_test_dir}/iotest_single_column_dupe_LOCALE0000'
            ],
                                  strictTypes=False,
                                  allow_errors=True)
        self.assertIsNotNone(dataset, "Expected dataset to be populated")
コード例 #2
0
 def testHdfUnsanitizedNames(self):
     # Test when quotes are part of the dataset name
     my_arrays = {'foo"0"': ak.arange(100), 'bar"': ak.arange(100)}
     with tempfile.TemporaryDirectory(
             dir=IOTest.io_test_dir) as tmp_dirname:
         ak.save_all(my_arrays, f"{tmp_dirname}/bad_dataset_names")
         ak.read_all(f"{tmp_dirname}/bad_dataset_names*")
コード例 #3
0
ファイル: io_test.py プロジェクト: mcdobe100/arkouda
    def testReadAllWithGlob(self):
        '''
        Creates 2..n files depending upon the number of arkouda_server locales with two
        files each containing different-named datasets with the same pdarrays, reads the files
        with the glob feature of the read_all method, and confirms the datasets and embedded 
        pdarrays match the input dataset and pdarrays

        :return: None
        :raise: AssertionError if the input and returned datasets don't match
        '''
        self._create_file(columns=self.dict_columns,
                          path_prefix='{}/iotest_dict_columns'.format(
                              IOTest.io_test_dir))
        self._create_file(columns=self.dict_columns,
                          path_prefix='{}/iotest_dict_columns_dupe'.format(
                              IOTest.io_test_dir))

        dataset = ak.read_all(
            filenames='{}/iotest_dict_columns*'.format(IOTest.io_test_dir))

        self.assertEqual(3, len(list(dataset.keys())))
        self.assertEqual(self.int_tens_pdarray.all(),
                         dataset['int_tens_pdarray'].all())
        self.assertEqual(self.int_hundreds_pdarray.all(),
                         dataset['int_hundreds_pdarray'].all())
        self.assertEqual(self.float_pdarray.all(),
                         dataset['float_pdarray'].all())
コード例 #4
0
    def testReadAllWithGlob(self):
        '''
        Creates 2..n files depending upon the number of arkouda_server locales with two
        files each containing different-named datasets with the same pdarrays, reads the files
        with the glob feature of the read_all method, and confirms the datasets and embedded 
        pdarrays match the input dataset and pdarrays

        :return: None
        :raise: AssertionError if the input and returned datasets don't match
        '''
        self._create_file(columns=self.dict_columns, 
                          prefix_path='{}/iotest_dict_columns'.format(IOTest.io_test_dir))
         
        retrieved_columns = ak.read_all(filenames='{}/iotest_dict_columns*'.format(IOTest.io_test_dir))

        itp = self.list_columns[0].to_ndarray()
        itp.sort()
        ritp = retrieved_columns['int_tens_pdarray'].to_ndarray()
        ritp.sort()
        ihp = self.list_columns[1].to_ndarray()
        ihp.sort()
        rihp = retrieved_columns['int_hundreds_pdarray'].to_ndarray()
        rihp.sort()
        fp = self.list_columns[2].to_ndarray()
        fp.sort()
        rfp = retrieved_columns['float_pdarray'].to_ndarray()
        rfp.sort()

        self.assertEqual(4, len(list(retrieved_columns.keys())))  
        self.assertTrue((itp == ritp).all())
        self.assertTrue((ihp == rihp).all())
        self.assertTrue((fp == rfp).all())
        self.assertEqual(len(self.bool_pdarray), len(retrieved_columns['bool_pdarray']))
コード例 #5
0
 def testStrictTypes(self):
     N = 100
     prefix = '{}/strict-type-test'.format(IOTest.io_test_dir)
     inttypes = [np.uint32, np.int64, np.uint16, np.int16]
     floattypes = [np.float32, np.float64, np.float32, np.float64]
     for i, (it, ft) in enumerate(zip(inttypes, floattypes)):
         with h5py.File('{}-{}'.format(prefix, i), 'w') as f:
             idata = np.arange(i*N, (i+1)*N, dtype=it)
             f.create_dataset('integers', data=idata)
             fdata = np.arange(i*N, (i+1)*N, dtype=ft)
             f.create_dataset('floats', data=fdata)
     with self.assertRaises(RuntimeError) as cm:
         a = ak.read_all(prefix+'*')
     self.assertTrue('Inconsistent precision or sign' in cm.exception.args[0])
     a = ak.read_all(prefix+'*', strictTypes=False)
     self.assertTrue((a['integers'] == ak.arange(len(inttypes)*N)).all())
     self.assertTrue(np.allclose(a['floats'].to_ndarray(), np.arange(len(floattypes)*N, dtype=np.float64)))
コード例 #6
0
ファイル: read_all_tests.py プロジェクト: zhihuidu/arkouda
    def testReadAll(self):
        ak.verbose = False  #client verbose Flag
        cwd = os.getcwd()
        allfiles = glob(cwd + '/../converter/netflow_day-*.hdf')
        print(allfiles)
        start = time.time()
        dictionary1 = ak.read_all(allfiles, iterative=True)
        end = time.time()
        t1 = end - start
        print("read_all(iterative=True) seconds: %.3f" % (t1))
        for key, value in dictionary1.items():
            print(key, type(value), value, len(value))

        start = time.time()
        dictionary2 = ak.read_all(allfiles)
        end = time.time()
        t2 = end - start
        print("read_all() seconds: %.3f" % (t2))
        for key, value in dictionary2.items():
            print(key, type(value), value, len(value))
コード例 #7
0
    def testSaveStringsDataset(self):
        # Create, save, and load Strings dataset
        strings_array = ak.array(
            ['testing string{}'.format(num) for num in list(range(0, 25))])
        strings_array.save('{}/strings-test'.format(IOTest.io_test_dir),
                           dataset='strings')
        r_strings_array = ak.load('{}/strings-test'.format(IOTest.io_test_dir),
                                  dataset='strings')

        strings = strings_array.to_ndarray()
        strings.sort()
        r_strings = r_strings_array.to_ndarray()
        r_strings.sort()
        self.assertTrue((strings == r_strings).all())

        # Read a part of a saved Strings dataset from one hdf5 file
        r_strings_subset = ak.read_all(filenames='{}/strings-test_LOCALE0000'.\
                                    format(IOTest.io_test_dir))
        self.assertIsNotNone(r_strings_subset)
        self.assertTrue(isinstance(r_strings_subset[0], str))
        self.assertIsNotNone(ak.read_hdf(filenames='{}/strings-test_LOCALE0000'.\
                            format(IOTest.io_test_dir), dsetName='strings/values'))
        self.assertIsNotNone(ak.read_hdf(filenames='{}/strings-test_LOCALE0000'.\
                            format(IOTest.io_test_dir), dsetName='strings/segments'))

        # Repeat the test using the calc_string_offsets=True option to have server calculate offsets array
        r_strings_subset = ak.read_all(
            filenames=f'{IOTest.io_test_dir}/strings-test_LOCALE0000',
            calc_string_offsets=True)
        self.assertIsNotNone(r_strings_subset)
        self.assertTrue(isinstance(r_strings_subset[0], str))
        self.assertIsNotNone(
            ak.read_hdf(
                filenames=f'{IOTest.io_test_dir}/strings-test_LOCALE0000',
                dsetName='strings/values',
                calc_string_offsets=True))
        self.assertIsNotNone(
            ak.read_hdf(
                filenames=f'{IOTest.io_test_dir}/strings-test_LOCALE0000',
                dsetName='strings/segments',
                calc_string_offsets=True))
コード例 #8
0
    def testReadAll(self):
        '''
        Creates 2..n files depending upon the number of arkouda_server locales, reads the files
        with an explicit list of file names to the read_all method, and confirms the datasets 
        and embedded pdarrays match the input dataset and pdarrays

        :return: None
        :raise: AssertionError if the input and returned datasets don't match
        '''
        self._create_file(columns=self.dict_columns, 
                          prefix_path='{}/iotest_dict_columns'.format(IOTest.io_test_dir))
        
        dataset = ak.read_all(filenames=['{}/iotest_dict_columns_LOCALE0'.format(IOTest.io_test_dir)])
        self.assertEqual(4, len(list(dataset.keys())))     
コード例 #9
0
ファイル: read_all_tests.py プロジェクト: zhihuidu/arkouda

if __name__ == '__main__':
    if len(sys.argv) < 3:
        print("Usage: {} <hostname> <port> <HDF5_filenames>".format(
            sys.argv[0]))
        sys.exit()
    ak.connect(sys.argv[1], sys.argv[2])
    ak.verbose = False  #client verbose Flag
    cwd = os.getcwd()
    allfiles = glob(cwd + '/../converter/netflow_day-*.hdf')
    if len(sys.argv) > 3:
        allfiles = sys.argv[3:]

    start = time.time()
    dictionary1 = ak.read_all(allfiles, iterative=True)
    end = time.time()
    t1 = end - start
    print("read_all(iterative=True) seconds: %.3f" % (t1))
    for key, value in dictionary1.items():
        print(key, type(value), value, len(value))

    start = time.time()
    dictionary2 = ak.read_all(allfiles)
    end = time.time()
    t2 = end - start
    print("read_all() seconds: %.3f" % (t2))
    for key, value in dictionary2.items():
        print(key, type(value), value, len(value))

    ak.disconnect()