def test_read_latest(): with pyfive.File(LATEST_HDF5_FILE) as hfile: # root assert hfile.attrs['attr1'] == -123 assert hfile.attrs['attr1'].dtype == np.dtype('int32') dset1 = hfile['dataset1'] assert_array_equal(dset1[:], np.arange(4)) assert dset1.dtype == np.dtype('<i4') assert dset1.attrs['attr2'] == 130 assert dset1.attrs['attr2'].dtype == np.dtype('uint8') # group grp = hfile['/group1'] assert_almost_equal(grp.attrs['attr3'], 12.34, 2) assert grp.attrs['attr3'].dtype == np.dtype('float32') dset2 = grp['dataset2'] assert_array_equal(dset2[:], np.arange(4)) assert dset2.dtype == np.dtype('>u8') assert dset2.attrs['attr4'] == b'Hi' assert dset2.attrs['attr4'].dtype == np.dtype('|S2') # sub-group subgroup = grp['subgroup1'] assert subgroup.attrs['attr5'] == b'Test' assert isinstance(subgroup.attrs['attr5'], bytes) dset3 = subgroup['dataset3'] assert_array_equal(dset2[:], np.arange(4)) assert dset3.dtype == np.dtype('<f4') assert dset3.attrs['attr6'] == u'Test' + b'\xc2\xa7'.decode('utf-8') assert isinstance(dset3.attrs['attr6'], string_type)
def test_reference_vlen_attr(): with pyfive.File(REFERENCES_HDF5_FILE) as hfile: vlen_ref_attr = hfile.attrs['vlen_refs'] root_ref = vlen_ref_attr[0][0] dset_ref = vlen_ref_attr[1][0] group_ref = vlen_ref_attr[1][1] # check references root = hfile[root_ref] assert root.attrs['root_attr'] == 123 assert root.name == '/' assert root.parent.name == '/' dset1 = hfile[dset_ref] assert_array_equal(dset1[:], [0, 1, 2, 3]) assert dset1.attrs['dset_attr'] == 456 assert dset1.name == '/dataset1' assert dset1.parent.name == '/' group = hfile[group_ref] assert group.attrs['group_attr'] == 789 assert group.name == '/group1' assert group.parent.name == '/'
def test_unsigned_int_dataset_datatypes(): hfile = pyfive.File(DATASET_DATATYPES_HDF5_FILE) # check data ref_data = np.arange(4) assert_array_equal(hfile['uint08_little'][:], ref_data) assert_array_equal(hfile['uint16_little'][:], ref_data) assert_array_equal(hfile['uint32_little'][:], ref_data) assert_array_equal(hfile['uint64_little'][:], ref_data) assert_array_equal(hfile['uint08_big'][:], ref_data) assert_array_equal(hfile['uint16_big'][:], ref_data) assert_array_equal(hfile['uint32_big'][:], ref_data) assert_array_equal(hfile['uint64_big'][:], ref_data) # check dtype assert hfile['uint08_little'].dtype == np.dtype('<u1') assert hfile['uint16_little'].dtype == np.dtype('<u2') assert hfile['uint32_little'].dtype == np.dtype('<u4') assert hfile['uint64_little'].dtype == np.dtype('<u8') assert hfile['uint08_big'].dtype == np.dtype('>u1') assert hfile['uint16_big'].dtype == np.dtype('>u2') assert hfile['uint32_big'].dtype == np.dtype('>u4') assert hfile['uint64_big'].dtype == np.dtype('>u8') hfile.close()
def test_signed_int_dataset_datatypes(): with pyfive.File(DATASET_DATATYPES_HDF5_FILE) as hfile: # check data ref_data = -np.arange(4) assert_array_equal(hfile['int08_little'][:], ref_data) assert_array_equal(hfile['int16_little'][:], ref_data) assert_array_equal(hfile['int32_little'][:], ref_data) assert_array_equal(hfile['int64_little'][:], ref_data) assert_array_equal(hfile['int08_big'][:], ref_data) assert_array_equal(hfile['int16_big'][:], ref_data) assert_array_equal(hfile['int32_big'][:], ref_data) assert_array_equal(hfile['int64_big'][:], ref_data) # check dtype assert hfile['int08_little'].dtype == np.dtype('<i1') assert hfile['int16_little'].dtype == np.dtype('<i2') assert hfile['int32_little'].dtype == np.dtype('<i4') assert hfile['int64_little'].dtype == np.dtype('<i8') assert hfile['int08_big'].dtype == np.dtype('>i1') assert hfile['int16_big'].dtype == np.dtype('>i2') assert hfile['int32_big'].dtype == np.dtype('>i4') assert hfile['int64_big'].dtype == np.dtype('>i8')
def test_numeric_scalar_attr_datatypes(): with pyfive.File(ATTR_DATATYPES_HDF5_FILE) as hfile: assert hfile.attrs['int08_little'] == -123 assert hfile.attrs['int16_little'] == -123 assert hfile.attrs['int32_little'] == -123 assert hfile.attrs['int64_little'] == -123 # These are 2**(size_in_bytes-1)+2 which could not be stored in # signed type of the same size assert hfile.attrs['uint08_little'] == 130 assert hfile.attrs['uint16_little'] == 32770 assert hfile.attrs['uint32_little'] == 2147483650 assert hfile.attrs['uint64_little'] == 9223372036854775810 assert hfile.attrs['int08_big'] == -123 assert hfile.attrs['int16_big'] == -123 assert hfile.attrs['int32_big'] == -123 assert hfile.attrs['int64_big'] == -123 assert hfile.attrs['uint08_big'] == 130 assert hfile.attrs['uint16_big'] == 32770 assert hfile.attrs['uint32_big'] == 2147483650 assert hfile.attrs['uint64_big'] == 9223372036854775810 assert hfile.attrs['float32_little'] == 123. assert hfile.attrs['float64_little'] == 123. assert hfile.attrs['float32_big'] == 123. assert hfile.attrs['float64_big'] == 123.
def test_compressed_dataset(): hfile = pyfive.File(DATASET_COMPRESSED_HDF5_FILE) # check data dset1 = hfile['dataset1'] assert dset1.shape == (21, 16) assert dset1.dtype == np.dtype('u2') assert dset1.compression == 'gzip' assert dset1.compression_opts == 4 assert dset1.shuffle is False assert_array_equal(dset1[:], np.arange(21 * 16).reshape((21, 16))) dset2 = hfile['dataset2'] assert dset2.shape == (21, 16) assert dset2.dtype == np.dtype('i4') assert dset2.compression == 'gzip' assert dset2.compression_opts == 4 assert dset2.shuffle is True assert_array_equal(dset2[:], np.arange(21 * 16).reshape((21, 16))) dset3 = hfile['dataset3'] assert dset3.shape == (21, 16) assert dset3.dtype == np.dtype('f8') assert dset3.compression is None assert dset3.compression_opts is None assert dset3.shuffle is True assert_array_equal(dset2[:], np.arange(21 * 16).reshape((21, 16))) hfile.close()
def test_chunked_reference_dataset(): with pyfive.File(REFERENCES_HDF5_FILE) as hfile: ref_dataset = hfile['chunked_ref_dataset'] root_ref = ref_dataset[0] dset_ref = ref_dataset[1] group_ref = ref_dataset[2] null_ref = ref_dataset[3] # check references root = hfile[root_ref] assert root.attrs['root_attr'] == 123 dset1 = hfile[dset_ref] assert_array_equal(dset1[:], [0, 1, 2, 3]) assert dset1.attrs['dset_attr'] == 456 group = hfile[group_ref] assert group.attrs['group_attr'] == 789 with assert_raises(ValueError): hfile[null_ref] assert bool(root_ref) assert bool(dset_ref) assert bool(group_ref) assert not bool(null_ref)
def test_dim_scales(): with pyfive.File(DIM_SCALES_HDF5_FILE) as hfile: # dataset with dimension scales dims = hfile['dset1'].dims assert len(dims) == 3 assert len(dims[0]) == 1 assert len(dims[1]) == 1 assert len(dims[2]) == 2 assert dims[0][0].name == '/z1' assert dims[1][0].name == '/y1' assert dims[2][0].name == '/x1' assert dims[2][1].name == '/x2' assert_array_equal(dims[0][0][:], [0, 10, 20, 30]) assert_array_equal(dims[1][0][:], [3, 4, 5]) assert_array_equal(dims[2][0][:], [1, 2]) assert_array_equal(dims[2][1][:], [99, 98]) # dataset with no dimension scales dims = hfile['dset2'].dims assert len(dims) == 3 assert len(dims[0]) == 0 assert len(dims[1]) == 0 assert len(dims[2]) == 0
def test_dataset_class(): with pyfive.File(EARLIEST_HDF5_FILE) as hfile: dset1 = hfile['dataset1'] grp = hfile['group1'] dset2 = grp['dataset2'] assert_array_equal(dset1[:], np.arange(4)) assert_array_equal(dset2[:], np.arange(4)) assert_array_equal(dset1.value, np.arange(4)) assert_array_equal(dset2.value, np.arange(4)) assert dset1.len() == 4 assert dset2.len() == 4 assert dset1.shape == (4, ) assert dset2.shape == (4, ) assert dset1.ndim == 1 assert dset2.ndim == 1 assert dset1.dtype == np.dtype('<i4') assert dset2.dtype == np.dtype('>u8') assert dset1.size == 4 assert dset2.size == 4 assert dset1.chunks is None assert dset2.chunks is None assert dset1.compression is None assert dset2.compression is None assert dset1.compression_opts is None assert dset2.compression_opts is None assert dset1.scaleoffset is None assert dset2.scaleoffset is None assert dset1.shuffle is False assert dset2.shuffle is False assert dset1.fletcher32 is False assert dset2.fletcher32 is False assert isinstance(dset1.attrs, dict) assert dset1.attrs['attr2'] == 130 assert isinstance(dset2.attrs, dict) assert dset2.attrs['attr4'] == b'Hi' assert dset1.name == '/dataset1' assert dset2.name == '/group1/dataset2' assert dset1.file is hfile assert dset2.file is hfile assert dset1.parent.name == '/' assert dset2.parent.name == '/group1'
def test_chunked_dataset(): with pyfive.File(DATASET_CHUNKED_HDF5_FILE) as hfile: # check data dset1 = hfile['dataset1'] assert_array_equal(dset1[:], np.arange(21 * 16).reshape((21, 16))) assert dset1.chunks == (2, 2)
def test_file_class(): hfile = pyfive.File(EARLIEST_HDF5_FILE) assert hfile.filename == EARLIEST_HDF5_FILE assert hfile.mode == 'r' assert hfile.userblock_size == 0 hfile.close()
def test_chunked_dataset(): hfile = pyfive.File(DATASET_CHUNKED_HDF5_FILE) # check data dset1 = hfile['dataset1'] assert_array_equal(dset1[:], np.arange(21 * 16).reshape((21, 16))) hfile.close()
def test_complex_scalar_attr_datatypes(): with pyfive.File(ATTR_DATATYPES_HDF5_FILE) as hfile: assert hfile.attrs['complex64_little'] == (123 + 456j) assert hfile.attrs['complex128_little'] == (123 + 456j) assert hfile.attrs['complex64_big'] == (123 + 456j) assert hfile.attrs['complex128_big'] == (123 + 456j)
def test_group_class(): hfile = pyfive.File(EARLIEST_HDF5_FILE) grp = hfile['group1'] subgrp = grp['subgroup1'] ################ # class methods ################ # __iter__() count = 0 for i in grp: count += 1 assert count == 2 # __contains__() assert 'dataset2' in grp assert 'subgroup1' in grp assert 'foobar' not in grp # __getitem__() assert grp['subgroup1'].name == '/group1/subgroup1' assert_raises(KeyError, grp.__getitem__, 'foobar') # keys() assert 'dataset2' in grp.keys() assert 'subgroup1' in grp.keys() assert 'foobar' not in grp.keys() # values() assert len(grp.values()) == 2 # items() assert len(grp.items()) == 2 # get() assert grp.get('subgroup1').name == '/group1/subgroup1' assert grp.get('foobar') is None #################### # class attributes #################### attrs = grp.attrs assert isinstance(attrs, dict) assert_almost_equal(attrs['attr3'], 12.34, 2) assert attrs['attr3'].dtype == np.dtype('float32') assert grp.name == '/group1' assert grp.file is hfile assert grp.parent is hfile assert subgrp.name == '/group1/subgroup1' assert grp.file is hfile assert subgrp.parent is grp hfile.close()
def test_astype(): with pyfive.File(EARLIEST_HDF5_FILE) as hfile: dset1 = hfile['dataset1'] assert dset1.dtype == np.dtype('<i4') with dset1.astype('i2'): assert dset1[:].dtype == np.dtype('i2') with dset1.astype('f8'): assert dset1[:].dtype == np.dtype('f8')
def test_string_scalar_attr_datatypes(): with pyfive.File(ATTR_DATATYPES_HDF5_FILE) as hfile: assert hfile.attrs['string_one'] == b'H' assert hfile.attrs['string_two'] == b'Hi' assert hfile.attrs['vlen_string'] == b'Hello' assert hfile.attrs['vlen_unicode'] == ( u'Hello' + b'\xc2\xa7'.decode('utf-8'))
def test_visititems_method(): with pyfive.File(GROUPS_HDF5_FILE) as hfile: assert hfile.visititems(lambda x, y: print(x, y.name)) is None name = 'group2/subgroup1' ret = hfile.visititems(lambda x, y: x if x == name else None) assert ret == name name = '/group2/subgroup1' # starts with /, not found assert hfile.visititems(lambda x, y: x if x == name else None) is None ret = hfile.visititems(lambda x, y: y if y.name == name else None) assert ret.name == name
def test_compressed_v1_dataset(): with pyfive.File(DATASET_COMPRESSED_HDF5_FILE) as hfile: # check data dset1 = hfile['temperature'] assert dset1.shape == (816852,) assert dset1.dtype == np.dtype('>f4') assert dset1.compression == 'gzip' assert dset1.compression_opts == 4 assert dset1.shuffle is False assert dset1[0] == 73.15625 assert dset1[-1] == 85.71875
def headerInterpreter(self): """Try to interpret the header, and return a dictionary with the header information""" #print self.type interpreterClass = importName("XIO.plugins.%s_interpreter" % \ self.type, "Interpreter") if not interpreterClass: interpreterClass = importName("plugins.%s_interpreter" % \ self.type, "Interpreter") if self.type == "hdf5dec": import pyfive try: self.rawHead = pyfive.File(self.fileName) except ImportError: print "\nThe master_file could not be interpreted." raise SystemExit if not interpreterClass: raise XIOError, "Can't import %s interperter" % (self.type) # Rules are serial number (or other identifier) based rules # To be added # Special = interpreter.SpecialRules # self.interpreter = interpreterClass() self.RawHeadDict = self.interpreter.getRawHeadDict(self.rawHead) #VERBOSE = True # Default value self.header['SensorThickness'] = 0.0 for k in self.interpreter.HTD.keys(): args, func = self.interpreter.HTD[k] #self.header[k] = apply(func, map(self.RawHeadDict.get,args)) if args[0] in self.RawHeadDict: try: self.header[k] = func(*map(self.RawHeadDict.get, args)) except ValueError: self.header[k] = 0. if VERBOSE: print "WARNING: Can't interpret header KEY %s" % k # Check consistancy of beam center coordinates (should be in mm). # with pixel size and number... # Some time the beam center is expressed in pixels rather than in mm. if (self.header["BeamX"] > self.header["Width"]*self.header["PixelX"])\ and \ (self.header["BeamX"] > self.header["Width"]*self.header["PixelX"]): self.header["BeamX"] = self.header["BeamX"] * self.header["PixelX"] self.header["BeamY"] = self.header["BeamY"] * self.header["PixelY"] self.header["ImageType"] = self.type return self.header
def test_dim_labels(): with pyfive.File(DIM_SCALES_HDF5_FILE) as hfile: # dataset with dimension labels dims = hfile['dset1'].dims assert dims[0].label == 'z' assert dims[1].label == 'y' assert dims[2].label == 'x' # dataset with no dimension labels dims = hfile['dset2'].dims assert dims[0].label == '' assert dims[1].label == '' assert dims[2].label == ''
def test_multidim_datasets(): with pyfive.File(DATASET_MULTIDIM_HDF5_FILE) as hfile: # check shapes assert hfile['a'][:].shape == (2, ) assert hfile['b'][:].shape == (2, 3) assert hfile['c'][:].shape == (2, 3, 4) assert hfile['d'][:].shape == (2, 3, 4, 5) # check data assert_array_equal(hfile['a'][:], np.arange(2).reshape((2, ))) assert_array_equal(hfile['b'][:], np.arange(6).reshape((2, 3))) assert_array_equal(hfile['c'][:], np.arange(24).reshape((2, 3, 4))) assert_array_equal(hfile['d'][:], np.arange(120).reshape((2, 3, 4, 5)))
def test_get_objects_by_path(): # gh-15 with pyfive.File(EARLIEST_HDF5_FILE) as hfile: grp = hfile['/group1'] assert hfile['/group1/subgroup1'].name == '/group1/subgroup1' assert grp['/group1/subgroup1'].name == '/group1/subgroup1' dset2 = hfile['group1/dataset2/'] assert dset2.name == '/group1/dataset2' assert_raises(KeyError, hfile.__getitem__, 'group1/fake') assert_raises(KeyError, hfile.__getitem__, 'group1/subgroup1/fake') assert_raises(KeyError, hfile.__getitem__, 'group1/dataset2/fake')
def test_resizable_dataset(): with pyfive.File(DATASET_RESIZABLE_HDF5_FILE) as hfile: dset1 = hfile['dataset1'] assert_array_equal(dset1[:], np.arange(4 * 6).reshape((4, 6))) assert dset1.dtype == '<f8' dset2 = hfile['dataset2'] assert_array_equal(dset2[:], np.arange(10 * 5).reshape((10, 5))) assert dset2.dtype == '<i4' dset3 = hfile['dataset3'] assert_array_equal(dset3[:], np.arange(8 * 4).reshape((8, 4))) assert dset3.dtype == '>i2'
def get_params(hdf5_file): extracted = {} h5cont = pyfive.File(hdf5_file) #dec.DImageSeries(hdf5_file) #neXus_tree = h5cont.neXus() #neXus_root = neXus_tree.root() #neXus_string_tree = iterate_children(neXus_root) if (len(sys.argv) == 2): print "Extracting metadata from " + hdf5_file print "Please modify XDS.INP if these numbers are incorrect.\n" for i in parameters: try: extracted[i] = str(h5cont[i].value) except: extracted[i] = "" return extracted
def test_fletcher32_datasets(): with pyfive.File(DATASET_FLETCHER_HDF5_FILE) as hfile: # check data dset1 = hfile['dataset1'] assert_array_equal(dset1[:], np.arange(4 * 4).reshape((4, 4))) assert dset1.chunks == (2, 2) # check data dset2 = hfile['dataset2'] assert_array_equal(dset2[:], np.arange(3)) assert dset2.chunks == (3, ) # check attribute assert dset1.fletcher32
def test_numeric_array_attr_datatypes(): with pyfive.File(ATTR_DATATYPES_HDF5_FILE) as hfile: assert_array_equal(hfile.attrs['int32_array'], [-123, 45]) assert_array_equal(hfile.attrs['uint64_array'], [12, 34]) assert_array_equal(hfile.attrs['float32_array'], [123, 456]) assert hfile.attrs['int32_array'].dtype == np.dtype('<i4') assert hfile.attrs['uint64_array'].dtype == np.dtype('>u8') assert hfile.attrs['float32_array'].dtype == np.dtype('<f4') assert hfile.attrs['vlen_str_array'][0] == b'Hello' assert hfile.attrs['vlen_str_array'][1] == b'World!' assert hfile.attrs['vlen_str_array'].dtype == np.dtype('S6')
def test_read_direct(): with pyfive.File(EARLIEST_HDF5_FILE) as hfile: dset1 = hfile['dataset1'] arr = np.zeros(4) dset1.read_direct(arr) assert_array_equal(arr, [0, 1, 2, 3]) arr = np.zeros(4) dset1.read_direct(arr, np.s_[:2], np.s_[:2]) assert_array_equal(arr, [0, 1, 0, 0]) arr = np.zeros(4) dset1.read_direct(arr, np.s_[1:3], np.s_[2:]) assert_array_equal(arr, [0, 0, 1, 2])
def test_float_dataset_datatypes(): with pyfive.File(DATASET_DATATYPES_HDF5_FILE) as hfile: # check data ref_data = np.arange(4) assert_array_equal(hfile['float32_little'][:], ref_data) assert_array_equal(hfile['float64_little'][:], ref_data) assert_array_equal(hfile['float32_big'][:], ref_data) assert_array_equal(hfile['float64_big'][:], ref_data) # check dtype assert hfile['float32_little'].dtype == np.dtype('<f4') assert hfile['float64_little'].dtype == np.dtype('<f8') assert hfile['float32_big'].dtype == np.dtype('>f4') assert hfile['float64_big'].dtype == np.dtype('>f8')
def test_visit_method(): with pyfive.File(GROUPS_HDF5_FILE) as hfile: assert hfile.visit(lambda x: print(x)) is None name = 'group2/subgroup1' assert hfile.visit(lambda x: x if x == name else None) == name name = '/group2/subgroup1' # starts with /, not found assert hfile.visit(lambda x: x if x == name else None) is None group2 = hfile['group2'] name = 'subgroup1' assert group2.visit(lambda x: x if x == name else None) == name name = 'group2/subgroup1' # rooted at group2 assert group2.visit(lambda x: x if x == name else None) is None
def test_vlen_sequence_attr_datatypes(): with pyfive.File(ATTR_DATATYPES_HDF5_FILE) as hfile: vlen_attr = hfile.attrs['vlen_int32'] assert len(vlen_attr) == 2 assert_array_equal(vlen_attr[0], [-1, 2]) assert_array_equal(vlen_attr[1], [3, 4, 5]) vlen_attr = hfile.attrs['vlen_uint64'] assert len(vlen_attr) == 3 assert_array_equal(vlen_attr[0], [1, 2]) assert_array_equal(vlen_attr[1], [3, 4, 5]) assert_array_equal(vlen_attr[2], [42]) vlen_attr = hfile.attrs['vlen_float32'] assert len(vlen_attr) == 3 assert_array_equal(vlen_attr[0], [0]) assert_array_equal(vlen_attr[1], [1, 2, 3]) assert_array_equal(vlen_attr[2], [4, 5])