def populateFrom(self, importedFile: h5py.File, topGroupKeys: List[str]): # We copy ilastikVersion as well as workflowName because that can influence the way in which the deserializers # interpret the imported data for key in topGroupKeys + self.BASE_KEYS: if key in importedFile.keys(): self.clearValue(key) importedFile.copy(key, self.file["/"])
def copy_file(self, **kwargs): """ Convenience function to copy an HDF5 file while allowing external links to be resolved. NOTE: The source file will be opened in 'r' mode and the destination file will be opened in 'w' mode using h5py. To avoid possible collisions, care should be taken that, e.g., the source file is not opened already when calling this function. """ source_filename, dest_filename, expand_external, expand_refs, expand_soft = getargs( 'source_filename', 'dest_filename', 'expand_external', 'expand_refs', 'expand_soft', kwargs) source_file = File(source_filename, 'r') dest_file = File(dest_filename, 'w') for objname in source_file["/"].keys(): source_file.copy( source=objname, dest=dest_file, name=objname, expand_external=expand_external, expand_refs=expand_refs, expand_soft=expand_soft, shallow=False, without_attrs=False, ) for objname in source_file['/'].attrs: dest_file['/'].attrs[objname] = source_file['/'].attrs[objname] source_file.close() dest_file.close()
def update_metadata_counts(infile: h5py.File, outfile: h5py.File, mask: SWIFTMask): """ Recalculates the cell particle counts and offsets based on the particles present in the subset Parameters ---------- infile : h5py.File File handle for input snapshot outfile : h5py.File File handle for output subset of snapshot mask : SWIFTMask the mask being used to define subset """ offsets_path = ( "Cells/OffsetsInFile" if "Cells/OffsetsInFile" in infile else "Cells/Offsets" ) outfile.create_group("Cells") outfile.create_group("Cells/Counts") outfile.create_group(offsets_path) # Get the particle counts and offsets in the cells particle_counts, particle_offsets = mask.get_masked_counts_offsets() # Loop over each particle type in the cells and update their counts counts_dsets = find_datasets(infile, path="/Cells/Counts") for part_type in particle_counts: for dset in counts_dsets: if get_swift_name(part_type) in dset: outfile[dset] = particle_counts[part_type] # Loop over each particle type in the cells and update their offsets offsets_dsets = find_datasets(infile, path=offsets_path) for part_type in particle_offsets: for dset in offsets_dsets: if get_swift_name(part_type) in dset: outfile[dset] = particle_offsets[part_type] # Copy the cell centres and metadata infile.copy("/Cells/Centres", outfile, name="/Cells/Centres") infile.copy("/Cells/Meta-data", outfile, name="/Cells/Meta-data")
def write_metadata(infile: h5py.File, outfile: h5py.File, links_list: List[str], mask: SWIFTMask): """ Copy over all the metadata from snapshot to output file Parameters ---------- infile : h5py.File hdf5 file handle for input snapshot outfile : h5py.File hdf5 file handle for output snapshot links_list : list of str names of links found in the snapshot mask : SWIFTMask the mask being used to define subset """ update_metadata_counts(infile, outfile, mask) skip_list = links_list.copy() skip_list += ["PartType", "Cells"] for field in infile.keys(): if not any([substr for substr in skip_list if substr in field]): infile.copy(field, outfile)
class TestCopy(TestCase): def setUp(self): self.f1 = File(self.mktemp(), 'w') self.f2 = File(self.mktemp(), 'w') def tearDown(self): if self.f1: self.f1.close() if self.f2: self.f2.close() @ut.skipIf(h5py.version.hdf5_version_tuple < (1, 8, 9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_path_to_path(self): foo = self.f1.create_group('foo') foo['bar'] = [1, 2, 3] self.f1.copy('foo', 'baz') baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['bar'], np.array([1, 2, 3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1, 8, 9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_path_to_group(self): foo = self.f1.create_group('foo') foo['bar'] = [1, 2, 3] baz = self.f1.create_group('baz') self.f1.copy('foo', baz) baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['foo/bar'], np.array([1, 2, 3])) self.f1.copy('foo', self.f2['/']) self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/bar'], np.array([1, 2, 3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1, 8, 9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_group_to_path(self): foo = self.f1.create_group('foo') foo['bar'] = [1, 2, 3] self.f1.copy(foo, 'baz') baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['bar'], np.array([1, 2, 3])) self.f2.copy(foo, 'foo') self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/bar'], np.array([1, 2, 3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1, 8, 9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_group_to_group(self): foo = self.f1.create_group('foo') foo['bar'] = [1, 2, 3] baz = self.f1.create_group('baz') self.f1.copy(foo, baz) baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['foo/bar'], np.array([1, 2, 3])) self.f1.copy(foo, self.f2['/']) self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/bar'], np.array([1, 2, 3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1, 8, 9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_dataset(self): self.f1['foo'] = [1, 2, 3] foo = self.f1['foo'] grp = self.f1.create_group("grp") self.f1.copy(foo, 'bar') self.assertArrayEqual(self.f1['bar'], np.array([1, 2, 3])) self.f1.copy('foo', 'baz') self.assertArrayEqual(self.f1['baz'], np.array([1, 2, 3])) self.f1.copy(foo, grp) self.assertArrayEqual(self.f1['/grp/foo'], np.array([1, 2, 3])) self.f1.copy('foo', self.f2) self.assertArrayEqual(self.f2['foo'], np.array([1, 2, 3])) self.f2.copy(self.f1['foo'], self.f2, 'bar') self.assertArrayEqual(self.f2['bar'], np.array([1, 2, 3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1, 8, 9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_shallow(self): foo = self.f1.create_group('foo') bar = foo.create_group('bar') foo['qux'] = [1, 2, 3] bar['quux'] = [4, 5, 6] self.f1.copy(foo, 'baz', shallow=True) baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertIsInstance(baz['bar'], Group) self.assertEqual(len(baz['bar']), 0) self.assertArrayEqual(baz['qux'], np.array([1, 2, 3])) self.f2.copy(foo, 'foo', shallow=True) self.assertIsInstance(self.f2['/foo'], Group) self.assertIsInstance(self.f2['foo/bar'], Group) self.assertEqual(len(self.f2['foo/bar']), 0) self.assertArrayEqual(self.f2['foo/qux'], np.array([1, 2, 3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1, 8, 9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_without_attributes(self): self.f1['foo'] = [1, 2, 3] foo = self.f1['foo'] foo.attrs['bar'] = [4, 5, 6] self.f1.copy(foo, 'baz', without_attrs=True) self.assertArrayEqual(self.f1['baz'], np.array([1, 2, 3])) assert 'bar' not in self.f1['baz'].attrs self.f2.copy(foo, 'baz', without_attrs=True) self.assertArrayEqual(self.f2['baz'], np.array([1, 2, 3])) assert 'bar' not in self.f2['baz'].attrs @ut.skipIf(h5py.version.hdf5_version_tuple < (1, 8, 9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_soft_links(self): self.f1['bar'] = [1, 2, 3] foo = self.f1.create_group('foo') foo['baz'] = SoftLink('/bar') self.f1.copy(foo, 'qux', expand_soft=True) self.f2.copy(foo, 'foo', expand_soft=True) del self.f1['bar'] self.assertIsInstance(self.f1['qux'], Group) self.assertArrayEqual(self.f1['qux/baz'], np.array([1, 2, 3])) self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/baz'], np.array([1, 2, 3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1, 8, 9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_external_links(self): filename = self.f1.filename self.f1['foo'] = [1, 2, 3] self.f2['bar'] = ExternalLink(filename, 'foo') self.f1.close() self.f1 = None self.assertArrayEqual(self.f2['bar'], np.array([1, 2, 3])) self.f2.copy('bar', 'baz', expand_external=True) os.unlink(filename) self.assertArrayEqual(self.f2['baz'], np.array([1, 2, 3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1, 8, 9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_refs(self): self.f1['foo'] = [1, 2, 3] self.f1['bar'] = [4, 5, 6] foo = self.f1['foo'] bar = self.f1['bar'] foo.attrs['bar'] = bar.ref self.f1.copy(foo, 'baz', expand_refs=True) self.assertArrayEqual(self.f1['baz'], np.array([1, 2, 3])) baz_bar = self.f1['baz'].attrs['bar'] self.assertArrayEqual(self.f1[baz_bar], np.array([4, 5, 6])) # The reference points to a copy of bar, not to bar itself. self.assertNotEqual(self.f1[baz_bar].name, bar.name) self.f1.copy('foo', self.f2, 'baz', expand_refs=True) self.assertArrayEqual(self.f2['baz'], np.array([1, 2, 3])) baz_bar = self.f2['baz'].attrs['bar'] self.assertArrayEqual(self.f2[baz_bar], np.array([4, 5, 6])) self.f1.copy('/', self.f2, 'root', expand_refs=True) self.assertArrayEqual(self.f2['root/foo'], np.array([1, 2, 3])) self.assertArrayEqual(self.f2['root/bar'], np.array([4, 5, 6])) foo_bar = self.f2['root/foo'].attrs['bar'] self.assertArrayEqual(self.f2[foo_bar], np.array([4, 5, 6])) # There's only one copy of bar, which the reference points to. self.assertEqual(self.f2[foo_bar], self.f2['root/bar'])
class TestCopy(TestCase): def setUp(self): self.f1 = File(self.mktemp(), 'w') self.f2 = File(self.mktemp(), 'w') def tearDown(self): if self.f1: self.f1.close() if self.f2: self.f2.close() @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_path_to_path(self): foo = self.f1.create_group('foo') foo['bar'] = [1,2,3] self.f1.copy('foo', 'baz') baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['bar'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_path_to_group(self): foo = self.f1.create_group('foo') foo['bar'] = [1,2,3] baz = self.f1.create_group('baz') self.f1.copy('foo', baz) baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['foo/bar'], np.array([1,2,3])) self.f1.copy('foo', self.f2['/']) self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/bar'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_group_to_path(self): foo = self.f1.create_group('foo') foo['bar'] = [1,2,3] self.f1.copy(foo, 'baz') baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['bar'], np.array([1,2,3])) self.f2.copy(foo, 'foo') self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/bar'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_group_to_group(self): foo = self.f1.create_group('foo') foo['bar'] = [1,2,3] baz = self.f1.create_group('baz') self.f1.copy(foo, baz) baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['foo/bar'], np.array([1,2,3])) self.f1.copy(foo, self.f2['/']) self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/bar'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_dataset(self): self.f1['foo'] = [1,2,3] foo = self.f1['foo'] self.f1.copy(foo, 'bar') self.assertArrayEqual(self.f1['bar'], np.array([1,2,3])) self.f1.copy('foo', 'baz') self.assertArrayEqual(self.f1['baz'], np.array([1,2,3])) self.f1.copy('foo', self.f2) self.assertArrayEqual(self.f2['foo'], np.array([1,2,3])) self.f2.copy(self.f1['foo'], self.f2, 'bar') self.assertArrayEqual(self.f2['bar'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_shallow(self): foo = self.f1.create_group('foo') bar = foo.create_group('bar') foo['qux'] = [1,2,3] bar['quux'] = [4,5,6] self.f1.copy(foo, 'baz', shallow=True) baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertIsInstance(baz['bar'], Group) self.assertEqual(len(baz['bar']), 0) self.assertArrayEqual(baz['qux'], np.array([1,2,3])) self.f2.copy(foo, 'foo', shallow=True) self.assertIsInstance(self.f2['/foo'], Group) self.assertIsInstance(self.f2['foo/bar'], Group) self.assertEqual(len(self.f2['foo/bar']), 0) self.assertArrayEqual(self.f2['foo/qux'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_without_attributes(self): self.f1['foo'] = [1,2,3] foo = self.f1['foo'] foo.attrs['bar'] = [4,5,6] self.f1.copy(foo, 'baz', without_attrs=True) self.assertArrayEqual(self.f1['baz'], np.array([1,2,3])) self.assert_('bar' not in self.f1['baz'].attrs) self.f2.copy(foo, 'baz', without_attrs=True) self.assertArrayEqual(self.f2['baz'], np.array([1,2,3])) self.assert_('bar' not in self.f2['baz'].attrs) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_soft_links(self): self.f1['bar'] = [1,2,3] foo = self.f1.create_group('foo') foo['baz'] = SoftLink('/bar') self.f1.copy(foo, 'qux', expand_soft=True) self.f2.copy(foo, 'foo', expand_soft=True) del self.f1['bar'] self.assertIsInstance(self.f1['qux'], Group) self.assertArrayEqual(self.f1['qux/baz'], np.array([1,2,3])) self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/baz'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_external_links(self): filename = self.f1.filename self.f1['foo'] = [1,2,3] self.f2['bar'] = ExternalLink(filename, 'foo') self.f1.close() self.f1 = None self.assertArrayEqual(self.f2['bar'], np.array([1,2,3])) self.f2.copy('bar', 'baz', expand_external=True) os.unlink(filename) self.assertArrayEqual(self.f2['baz'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_refs(self): self.f1['foo'] = [1,2,3] self.f1['bar'] = [4,5,6] foo = self.f1['foo'] bar = self.f1['bar'] foo.attrs['bar'] = bar.ref self.f1.copy(foo, 'baz', expand_refs=True) self.assertArrayEqual(self.f1['baz'], np.array([1,2,3])) baz_bar = self.f1['baz'].attrs['bar'] self.assertArrayEqual(self.f1[baz_bar], np.array([4,5,6])) # The reference points to a copy of bar, not to bar itself. self.assertNotEqual(self.f1[baz_bar].name, bar.name) self.f1.copy('foo', self.f2, 'baz', expand_refs=True) self.assertArrayEqual(self.f2['baz'], np.array([1,2,3])) baz_bar = self.f2['baz'].attrs['bar'] self.assertArrayEqual(self.f2[baz_bar], np.array([4,5,6])) self.f1.copy('/', self.f2, 'root', expand_refs=True) self.assertArrayEqual(self.f2['root/foo'], np.array([1,2,3])) self.assertArrayEqual(self.f2['root/bar'], np.array([4,5,6])) foo_bar = self.f2['root/foo'].attrs['bar'] self.assertArrayEqual(self.f2[foo_bar], np.array([4,5,6])) # There's only one copy of bar, which the reference points to. self.assertEqual(self.f2[foo_bar], self.f2['root/bar'])
California Institute of Technology. All rights reserved """ # from netCDF4 import Dataset from h5py import File, Dataset hinput = File( '/Users/greguska/gitprojeects/nexus/nexus-ingest/developer-box/data/smap/SMAP_L2B_SSS_00865_20150331T163144_R13080.h5', 'r') houput = File( '/Users/greguska/gitprojeects/nexus/nexus-ingest/developer-box/data/smap/SMAP_L2B_SSS_00865_20150331T163144_R13080.split.h5', 'w') for key in hinput.keys(): hinput.copy('/' + key, houput['/'], name=key) if houput[key].ndim == 2: houput[key + '_c'] = houput[key][0:76, 181:183] elif houput[key].ndim == 3: houput[key + '_c'] = houput[key][0:76, 181:183, :] elif houput[key].ndim == 1: houput[key + '_c'] = houput[key][181:183] houput[key + '_c'].attrs.update(houput[key].attrs) del houput[key] houput[key] = houput[key + '_c'] del houput[key + '_c'] print houput[key] houput.attrs.update(hinput.attrs)