def test_table_info(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) h.append_ctable('/table', self.col_data) chk_dict = { 'col_dtype': { 't_int': 'i', 't_float': 'f', 't_str': 's5', 't_obj': 'o7', 't_bytes': 'o12', 't_comp': 's200' }, 'col_flavor': { 't_int': 'python', 't_float': 'python', 't_str': 'python', 't_obj': 'python', 't_bytes': 'python', 't_comp': 'python' }, 'num_rows': 1000 } info = h.table_info('/table') self.assertDictEqual(info['col_dtype'], chk_dict['col_dtype']) self.assertDictEqual(info['col_flavor'], chk_dict['col_flavor']) self.assertEqual(info['num_rows'], 1000)
def test_delcol(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) h.append_ctable('/table', {'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']}) table_info = h.table_info('/table') self.assertDictEqual( table_info, { 'col_dtype': { 'col1': 'i', 'col2': 's1' }, 'col_flavor': { 'col1': 'python', 'col2': 'python' }, 'num_rows': 3, '_version': VERSION }) h.delete_column('/table', 'col1') table_info = h.table_info('/table') self.assertSequenceEqual( table_info, { 'col_dtype': { 'col2': 's1' }, 'col_flavor': { 'col2': 'python' }, 'num_rows': 3, '_version': VERSION })
def test_table_safe_expand(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) dt = {'t_comp': 'o100'} h.create_ctable('/table', col_dtypes=dt) d1 = [('hello', ), ('world', 0)] h.append_ctable('/table', {'t_comp': d1}, resize=True) # read data b = h.read_ctable('/table') self.assertListEqual(b['t_comp'], d1) # read info info = h.table_info('/table') self.assertEqual(info['col_dtype']['t_comp'], 'o100') # write data bigger than d2 = [('hello' * 50), '1'] h.append_ctable('/table', {'t_comp': d2}, resize=True) b = h.read_ctable('/table') self.assertListEqual(b['t_comp'], d1 + d2) # ensure datatypes attribute was updated with h.open() as h5: node = h5.get_node('/table/t_comp') m = re.search(r'S(\d+)', str(node.dtype)) new_len = int(m.group(1)) info = h.table_info('/table') self.assertEqual(info['col_dtype']['t_comp'], f'o{new_len}')
def test_append_without_create_dtypes(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) h.append_ctable('/table', self.col_data) info = h.table_info('/table') self.assertDictEqual( info['col_dtype'], { 't_int': 'i', 't_float': 'f', 't_str': 's5', 't_obj': 'o7', 't_bytes': 'o12', 't_comp': 's200' }) h.append_ctable('/table1', self.col_data, col_dtypes=self.dt) info = h.table_info('/table1') self.assertDictEqual( info['col_dtype'], { 't_int': 'i8', 't_float': 'n', 't_str': 's100', 't_obj': 'o500', 't_bytes': 'o40', 't_comp': 'c100' }) b = h.read_ctable('/table1') for k, v in self.col_data.items(): for i, x in enumerate(b[k]): self.assertEqual(x, v[i])
def test_select_inds(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) tmp_data = {'c1': [], 'c2': []} for i in range(100): tmp_data['c1'].append(f'h{i}') tmp_data['c2'].append(i) h.append_ctable('/table', tmp_data) b = h.read_ctable('/table') self.assertDictEqual(tmp_data, b) b1 = h.read_ctable('/table', inds=list(range(10, 34))) self.assertListEqual(b1['c1'], tmp_data['c1'][10:34]) self.assertListEqual(b1['c2'], tmp_data['c2'][10:34]) # read outside range with self.assertRaises(IndexError) as e: b1 = h.read_ctable('/table', inds=list(range(90, 105))) # read out of order b1 = h.read_ctable('/table', inds=[10, 2, 78]) self.assertDictEqual(b1, { 'c2': [10, 2, 78], 'c1': ['h10', 'h2', 'h78'] }) # read limited columns b1 = h.read_ctable('/table', inds=[11, 3, 78], cols=['c2']) self.assertDictEqual(b1, {'c2': [11, 3, 78]})
def test_makedir_path(self): new_file = os.path.join(self.tmp_dir, 'level1', 'level2', 'level3', 'abc.h5') h5 = H5ColStore(new_file) h5.create_ctable('myoobj', col_dtypes={'col1': 'f'}) self.assertTrue(os.path.exists(new_file))
def test_table_attrs_write(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) h.create_ctable('/table', col_dtypes=self.dt) attrs = h.table_info('/table') self.assertDictEqual(attrs['col_dtype'], self.dt) with h.open() as h5: h._write_attrs(h5, '/table', 'hello', 'world') attrs = h.table_info('/table') self.assertDictEqual(attrs['col_dtype'], self.dt) self.assertEqual(attrs['hello'], 'world') # overwrite with h.open() as h5: h._write_attrs(h5, '/table', 'hello', 'yolo') attrs = h.table_info('/table') self.assertSetEqual(set(attrs.keys()), {'col_dtype', 'hello', 'num_rows', '_version'}) self.assertDictEqual(attrs['col_dtype'], self.dt) self.assertEqual(attrs['hello'], 'yolo') # get by name self.assertEqual(h.table_info('/table')['hello'], 'yolo')
def test_repack_nonexist(self): unique_filename = str(uuid.uuid4()) + '.h5' path_name = os.path.join(self.tmp_dir, unique_filename) h = H5ColStore(path_name) with self.assertRaises(Exception): h.repack()
def test_updates_single(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) h.append_ctable('/table', self.col_data) match = [ ['col2', '==', 'e'], ] new_data = {'col1': [20]} h.update_ctable('/table', match, new_data) b = h.read_ctable('/table') self.assertListEqual(b['col1'], [0, 1, 2, 3, 20, 5, 6, 7, 8, 9]) h.update_ctable('/table', match, new_data) new_data = {'col1': [22]} h.update_ctable('/table', match, new_data) b = h.read_ctable('/table') self.assertListEqual(b['col1'], [0, 1, 2, 3, 22, 5, 6, 7, 8, 9]) new_data = {'col3': [('sweet', 'now')]} check = [('yolo', 'people') for _ in range(10)] check[4] = ('sweet', 'now') h.update_ctable('/table', match, new_data) b = h.read_ctable('/table') self.assertListEqual(b['col3'], check)
def test_table_nocomp_fail(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) dt = {'t_comp': 'o100'} h.create_ctable('/table', col_dtypes=dt) with self.assertRaises(Exception): h.append_ctable('/table', {'t_comp': self.dcomp}, resize=False)
def test_path(self): h5 = H5ColStore('abc.h5') self.assertEqual(h5._path('/hello', 'world'), '/hello/world') self.assertEqual(h5._path('hello', 'world'), '/hello/world') self.assertEqual(h5._path('/', 'world'), '/world') self.assertEqual(h5._path('/', '/world'), '/world') self.assertEqual(h5._path('', 'world'), '/world')
def test_delete_rows_no_match(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) h.append_ctable('/table', self.col_data) match = ('col1', '==', 15) x = h.delete_rows('/table', query=match) self.assertIsNone(x)
def test_convert_obj_compress(self): h5 = H5ColStore('a.h5') d = h5._convert_data([['a'], ['b']], 'c100') expect = [ b'\x02\x01\x13\x08\x03\x00\x00\x00\x01\x00\x00\x00\x13\x00\x00\x00\x91\xa1a1', b'\x02\x01\x13\x08\x03\x00\x00\x00\x01\x00\x00\x00\x13\x00\x00\x00\x91\xa1b1' ] self.assertIsInstance(d, np.ndarray) self.assertListEqual(d.tolist(), expect)
def test_col_same_length_exception(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) col_data = { 'col1': list(range(10)), 'col2': ['a', 'a', 'c', 'd', 'e', 'f', 'g', 'g', 'f'] } with self.assertRaises(Exception): h.append_ctable('/table', col_data)
def test_matrix_inds(self): new_file = os.path.join(self.tmp_dir, 'matrixtest.h5') h5 = H5ColStore(new_file) m = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]]) h5.append_ctable('/m1', col_data={'col1': m}, col_dtypes={'col1': 'f'}) x = h5.read_ctable('/m1', inds=[1, 2])['col1'] self.assertTrue(np.sum(np.sum(x - m[1:, :])) == 0)
def test_addcol(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) with self.assertRaises(Exception): h.add_column('/table', 'new_col', [1 for _ in range(1000)]) h.append_ctable('/table', {'init_col': [1 for _ in range(1000)]}) with self.assertRaises(Exception): h.add_column('/table', 'new_col', [1 for _ in range(999)]) info = h.table_info('/table') self.assertDictEqual( info, { 'col_dtype': { 'init_col': 'i' }, 'col_flavor': { 'init_col': 'python' }, 'num_rows': 1000, '_version': VERSION }) for col_name, col_data in self.col_data.items(): h.add_column('/table', col_name, col_data) info = h.table_info('/table') chk = { 'col_dtype': { 'init_col': 'i', 't_int': 'i', 't_float': 'f', 't_str': 's5', 't_obj': 'o7', 't_bytes': 'o12', 't_comp': 's200' }, 'col_flavor': { 'init_col': 'python', 't_int': 'python', 't_float': 'python', 't_str': 'python', 't_obj': 'python', 't_bytes': 'python', 't_comp': 'python' }, 'num_rows': 1000, '_version': VERSION } self.assertDictEqual(info, chk)
def test_column_matrix(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) data = np.random.rand(10, 3) with h.open(mode='a') as h5: h._create_column(h5, '/mytable/testcol', data=data) with h.open(mode='r') as h5: n = h5.get_node('/mytable/testcol') self.assertEqual(str(n.dtype), 'float64') self.assertTupleEqual(n.shape, (10, 3))
def test_matrix_initappend(self): new_file = os.path.join(self.tmp_dir, 'matrixtest.h5') h5 = H5ColStore(new_file) a = np.random.rand(2, 4) b = np.random.rand(3, 4) h5.append_ctable('/m1', col_data={'col1': a}, col_dtypes={'col1': 'f'}) h5.append_ctable('/m1', col_data={'col1': b}, col_dtypes={'col1': 'f'}) x = h5.read_ctable('/m1')['col1'] chk_mat = np.vstack((a, b)) self.assertTrue(np.sum(np.sum(x - chk_mat)) == 0)
def _run_test(self, data, typecheck, size): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) with h.open() as h5: h._create_column(h5, '/data', data=data) with h.open() as h5: a = h5.get_node('/data') self.assertEqual(a.dtype, typecheck) self.assertEqual(len(a), size)
def test_delete_single_item(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) h.append_ctable('/table', {'col1': ['abc'], 'col2': [1]}) b = h.read_ctable('/table') self.assertDictEqual(b, {'col1': ['abc'], 'col2': [1]}) match = [['col1', '==', 'abc']] h.delete_rows('/table', query=match) b = h.read_ctable('/table') self.assertDictEqual(b, {'col1': [], 'col2': []})
def test_list_return(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) h.create_ctable('/table', col_dtypes=self.dt) h.append_ctable('/table', self.col_data) simpah5_attrs = h.table_info('/table') self.assertSetEqual(set(simpah5_attrs['col_flavor'].values()), {'python'}) b = h.read_ctable('/table') for k, v in b.items(): self.assertIsInstance(v, list)
def test_remove(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) h.append_ctable('/table', self.col_data) _ = h.table_info('/table') h.delete_ctable('/table') info = h.table_info('/table') self.assertEqual(len(info), 0) # check for delete exceptions self.assertIsNone(h.delete_ctable('/table')) with self.assertRaises(tb.NoSuchNodeError): h.delete_ctable('/table', raise_exception=True)
def test_table_all_vals(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) h.create_ctable('/table', col_dtypes=self.dt) # check read right after create b = h.read_ctable('/table') self.assertSetEqual(set(list(b.keys())), set(list(self.dt.keys()))) h.append_ctable('/table', self.col_data) b = h.read_ctable('/table') for k, v in self.col_data.items(): for i, x in enumerate(b[k]): self.assertEqual(x, v[i])
def test_ndarray_return(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) h.create_ctable('/table', col_dtypes=self.dt) tmp_data = {} for k, v in self.col_data.items(): tmp_data[k] = np.array(v) h.append_ctable('/table', tmp_data) b = h.read_ctable('/table') for k, v in b.items(): # ignore packed columns if not re.match(r'[oc]', self.dt[k]): self.assertIsInstance(v, np.ndarray)
def test_updates_exception(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) h.append_ctable('/table', self.col_data) match = [ ['col2', '==', 'a'], ] new_data = {'col1': [21, 22, 23]} with self.assertRaises(Exception): h.update_ctable('/table', match, new_data) match = [ [('col2', '==', 'a'), ('col2', '==', 'g')], ] new_data = {'col1': [21, 22]} with self.assertRaises(Exception): h.update_ctable('/table', match, new_data)
def test_updates_multi(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) h.append_ctable('/table', self.col_data) match = [ ['col2', '==', 'a'], ] new_data = {'col1': [20]} h.update_ctable('/table', match, new_data) b = h.read_ctable('/table') self.assertListEqual(b['col1'], [20, 20, 2, 3, 4, 5, 6, 7, 8, 9]) new_data = {'col1': [21, 22], 'col2': ['w', 'z']} h.update_ctable('/table', match, new_data) b = h.read_ctable('/table') self.assertListEqual(b['col1'], [21, 22, 2, 3, 4, 5, 6, 7, 8, 9]) self.assertListEqual( b['col2'], ['w', 'z', 'c', 'd', 'e', 'f', 'g', 'g', 'f', 'c'])
def test_delete_rows1(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) h.append_ctable('/table', self.col_data) h.delete_rows('/table', rows=[2, 5, 6]) b = h.read_ctable('/table') self.assertListEqual(b['col1'], [0, 1, 7, 3, 4, 8, 9]) self.assertListEqual(b['col2'], ['a', 'a', 'g', 'd', 'e', 'f', 'c']) match = [['col2', '==', 'a']] h.delete_rows('/table', query=match) b = h.read_ctable('/table') self.assertDictEqual(b, { 'col1': [8, 9, 7, 3, 4], 'col2': ['f', 'c', 'g', 'd', 'e'] }) match = [[('col2', '==', 'd'), ('col2', '==', 'g')]] h.delete_rows('/table', query=match) b = h.read_ctable('/table') self.assertDictEqual(b, {'col1': [8, 9, 4], 'col2': ['f', 'c', 'e']})
def test_growth_rand_repack(self): unique_filename = str(uuid.uuid4()) + '.h5' path_name = os.path.join(self.tmp_dir, unique_filename) h = H5ColStore(path_name) nloops = 200 for x in range(nloops): news = ''.join([f'{np.random.randint(10)}']) data = {'t_comp': [np.random.rand()], 't_str': [news]} h.append_ctable('/table', col_data=data, resize=True) sz = os.stat(path_name).st_size # read data b = h.read_ctable('/table') self.assertEqual(len(b['t_comp']), x+1) self.assertEqual(len(b['t_str']), x+1) self.assertTrue(sz > 100000) h.repack() sz = os.stat(path_name).st_size self.assertTrue(sz < 15000)
def test_write_wrong_data(self): unique_filename = str(uuid.uuid4()) + '.h5' h = H5ColStore(os.path.join(self.tmp_dir, unique_filename)) col_data = { 'col1': [1, 2, 3], 'col3': [[1, 2], ['3', '4'], [5, '6']], 'col2': ['a', 'b', 'c'], } h.append_ctable('/table', col_data) info = h.table_info('/table') self.assertDictEqual(info['col_dtype'], { 'col1': 'i', 'col2': 's1', 'col3': 'o6' }) # setup incorrect data type on col2 new_col_data = { 'col1': [4.], # write incorrect data to col2 'col2': [[ 5, ]], 'col3': [[ 7, ]], } with self.assertRaises(Exception): h.append_ctable('/table', new_col_data) info = h.table_info('/table') data = h.read_ctable('/table') self.assertEqual(info['num_rows'], 3) self.assertListEqual(data['col1'], [1, 2, 3]) self.assertListEqual(data['col2'], ['a', 'b', 'c']) self.assertListEqual(data['col3'], [(1, 2), ('3', '4'), (5, '6')])
def test_growth_size_limit(self): unique_filename = str(uuid.uuid4()) + '.h5' path_name = os.path.join(self.tmp_dir, unique_filename) h = H5ColStore(path_name) nloops = 200 for x in range(nloops): data = {'t_comp': [1], 't_str': ['hello']} h.append_ctable('/table', col_data=data, resize=True) sz = os.stat(path_name).st_size # with pytables 4.3.2 the following grows to 28000 with nloops=200!! self.assertTrue(sz < 11000) # read data b = h.read_ctable('/table') self.assertListEqual(b['t_comp'], [1] * (x+1)) self.assertListEqual(b['t_str'], ['hello'] * (x+1)) h.repack() sz = os.stat(path_name).st_size self.assertTrue(sz < 11000)