def test_write_dset_under_root(self): file_path = 'test.h5' self.__delete_existing_file(file_path) with h5py.File(file_path) as h5_f: writer = HDFwriter(h5_f) data = np.random.rand(5, 7) attrs = {'att_1': 'string_val', 'att_2': 1.2345, 'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3'], 'labels': {'even_rows': (slice(0, None, 2), slice(None)), 'odd_rows': (slice(1, None, 2), slice(None))} } micro_dset = VirtualDataset('test', data) micro_dset.attrs = attrs.copy() [h5_dset] = writer.write(micro_dset) self.assertIsInstance(h5_dset, h5py.Dataset) reg_ref = attrs.pop('labels') self.assertEqual(len(h5_dset.attrs), len(attrs) + 1 + len(reg_ref)) for key, expected_val in attrs.items(): self.assertTrue(np.all(get_attr(h5_dset, key) == expected_val)) self.assertTrue(np.all([x in list(reg_ref.keys()) for x in get_attr(h5_dset, 'labels')])) expected_data = [data[:None:2], data[1:None:2]] written_data = [h5_dset[h5_dset.attrs['even_rows']], h5_dset[h5_dset.attrs['odd_rows']]] for exp, act in zip(expected_data, written_data): self.assertTrue(np.allclose(exp, act)) os.remove(file_path)
def test_write_simple_tree(self): file_path = 'test.h5' self.__delete_existing_file(file_path) with h5py.File(file_path) as h5_f: inner_dset_data = np.random.rand(5, 7) inner_dset_attrs = {'att_1': 'string_val', 'att_2': 1.2345, 'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3'], 'labels': {'even_rows': (slice(0, None, 2), slice(None)), 'odd_rows': (slice(1, None, 2), slice(None))} } inner_dset = VirtualDataset('inner_dset', inner_dset_data) inner_dset.attrs = inner_dset_attrs.copy() attrs_inner_grp = {'att_1': 'string_val', 'att_2': 1.2345, 'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3']} inner_group = VirtualGroup('indexed_inner_group_') inner_group.attrs = attrs_inner_grp inner_group.add_children(inner_dset) outer_dset_data = np.random.rand(5, 7) outer_dset_attrs = {'att_1': 'string_val', 'att_2': 1.2345, 'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3'], 'labels': {'even_rows': (slice(0, None, 2), slice(None)), 'odd_rows': (slice(1, None, 2), slice(None))} } outer_dset = VirtualDataset('test', outer_dset_data, parent='/test_group') outer_dset.attrs = outer_dset_attrs.copy() attrs_outer_grp = {'att_1': 'string_val', 'att_2': 1.2345, 'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3']} outer_group = VirtualGroup('unindexed_outer_group') outer_group.attrs = attrs_outer_grp outer_group.add_children([inner_group, outer_dset]) writer = HDFwriter(h5_f) h5_refs_list = writer.write(outer_group) # I don't know of a more elegant way to do this: [h5_outer_dset] = get_h5_obj_refs([outer_dset.name], h5_refs_list) [h5_inner_dset] = get_h5_obj_refs([inner_dset.name], h5_refs_list) [h5_outer_group] = get_h5_obj_refs([outer_group.name], h5_refs_list) [h5_inner_group] = get_h5_obj_refs(['indexed_inner_group_000'], h5_refs_list) self.assertIsInstance(h5_outer_dset, h5py.Dataset) self.assertIsInstance(h5_inner_dset, h5py.Dataset) self.assertIsInstance(h5_outer_group, h5py.Group) self.assertIsInstance(h5_inner_group, h5py.Group) # check assertions for the inner dataset first self.assertEqual(h5_inner_dset.parent, h5_inner_group) reg_ref = inner_dset_attrs.pop('labels') self.assertEqual(len(h5_inner_dset.attrs), len(inner_dset_attrs) + 1 + len(reg_ref)) for key, expected_val in inner_dset_attrs.items(): self.assertTrue(np.all(get_attr(h5_inner_dset, key) == expected_val)) self.assertTrue(np.all([x in list(reg_ref.keys()) for x in get_attr(h5_inner_dset, 'labels')])) expected_data = [inner_dset_data[:None:2], inner_dset_data[1:None:2]] written_data = [h5_inner_dset[h5_inner_dset.attrs['even_rows']], h5_inner_dset[h5_inner_dset.attrs['odd_rows']]] for exp, act in zip(expected_data, written_data): self.assertTrue(np.allclose(exp, act)) # check assertions for the inner data group next: self.assertEqual(h5_inner_group.parent, h5_outer_group) for key, expected_val in attrs_inner_grp.items(): self.assertTrue(np.all(get_attr(h5_inner_group, key) == expected_val)) # check the outer dataset next: self.assertEqual(h5_outer_dset.parent, h5_outer_group) reg_ref = outer_dset_attrs.pop('labels') self.assertEqual(len(h5_outer_dset.attrs), len(outer_dset_attrs) + 1 + len(reg_ref)) for key, expected_val in outer_dset_attrs.items(): self.assertTrue(np.all(get_attr(h5_outer_dset, key) == expected_val)) self.assertTrue(np.all([x in list(reg_ref.keys()) for x in get_attr(h5_outer_dset, 'labels')])) expected_data = [outer_dset_data[:None:2], outer_dset_data[1:None:2]] written_data = [h5_outer_dset[h5_outer_dset.attrs['even_rows']], h5_outer_dset[h5_outer_dset.attrs['odd_rows']]] for exp, act in zip(expected_data, written_data): self.assertTrue(np.allclose(exp, act)) # Finally check the outer group: self.assertEqual(h5_outer_group.parent, h5_f) for key, expected_val in attrs_outer_grp.items(): self.assertTrue(np.all(get_attr(h5_outer_group, key) == expected_val)) os.remove(file_path)