def dump_cloudsat(filename): """ walk the hdf file and print out information about each vgroup and vdata object Parameters ---------- filename: str or Path object name of hdf file Returns ------- prints information to stdout """ # filename=str(filename) hdf = HDF(filename) # Initialize the SD, V and VS interfaces on the file. sd = SD(filename) vs = hdf.vstart() v = hdf.vgstart() # Scan all vgroups in the file. ref = -1 while 1: try: ref = v.getid(ref) print('vgroup: ',ref) except HDF4Error as msg: # no more vgroup break describevg(ref,v,vs,sd) return None
class HDF4(_geospatial): """ HDF4 context manager class. """ hdf = None vs = None v = None def __init__(self, fname): """ :param str fname: The path of the HDF4 file. """ self.fname = str(fname) def __enter__(self): """ Open HDF file and interfaces for use as context manager. :returns: Self. """ self.hdf = HDF(self.fname) self.vs = self.hdf.vstart() self.v = self.hdf.vgstart() return self def __exit__(self, *args): """ Close interfaces and HDF file after finishing use in context manager. """ self.v.end() self.vs.end() self.hdf.close() def _get_coords(self, vs, fn): """ Iterate through vgroup and return a list of coordinates (if existing). :param HDF4.V.vs vs: VData object :param str fn: Path to the data file :returns: Dict containing geospatial information. """ mappings = { "NVlat2": "lat", "NVlng2": "lon", } coords = {} for k, v in mappings.iteritems(): ref = vs.find(k) vd = vs.attach(ref) coords[v] = [] while True: try: coord = float(vd.read()[0][0]) coord /= 10**7 coords[v].append(coord) except HDF4Error: # End of file break vd.detach() return coords def _get_temporal(self, vs, fn): """ Return start and end timestamps (if existing) :param HDF4.V.vs vs: VData object :param str fn: Path to the data file :returns: Dict containing temporal information. """ mappings = { "MIdate": "date", "MIstime": "start_time", "MIetime": "end_time", } timestamps = {} for k, v in mappings.iteritems(): ref = vs.find(k) vd = vs.attach(ref) timestamps[v] = [] while True: try: timestamps[v].append(vd.read()[0][0]) except HDF4Error: # EOF break vd.detach() # This list comprehension basically converts from a list of integers # into a list of chars and joins them together to make strings # ... # If unclear - HDF text data comes out as a list of integers, e.g.: # 72 101 108 108 111 32 119 111 114 108 100 (this means "Hello world") # Those "char" numbers get converted to strings with this snippet. dates = [chr(x) for x in timestamps["date"] if x != 0] timestamps["date"] = ''.join(dates) return self._parse_timestamps(timestamps) def _parse_timestamps(self, tm_dict): """ Parse start and end timestamps from an HDF4 file. :param dict tm_dict: The timestamp to be parsed :returns: Dict containing start and end timestamps """ st_base = ("%s %s" % (tm_dict["date"], tm_dict["start_time"][0])) et_base = ("%s %s" % (tm_dict["date"], tm_dict["end_time"][0])) for t_format in ["%d/%m/%y %H%M%S", "%d/%m/%Y %H%M%S"]: try: start_time = datetime.datetime.strptime(st_base, t_format) end_time = datetime.datetime.strptime(et_base, t_format) except ValueError: # ValueError will be raised if strptime format doesn't match # the actual timestamp - so just try the next strptime format continue return {"start_time": start_time.isoformat(), "end_time": end_time.isoformat()} def get_geospatial(self): """ Search through HDF4 file, returning a list of coordinates from the 'Navigation' vgroup (if it exists). :returns: Dict containing geospatial information. """ ref = -1 while True: try: ref = self.v.getid(ref) vg = self.v.attach(ref) if vg._name == "Navigation": geospatial = self._get_coords(self.vs, self.fname) geospatial["type"] = "track" # Type annotation vg.detach() return geospatial vg.detach() except HDF4Error: # End of file # This is a weird way of handling files, but this is what the # pyhdf library demonstrates... break return None def get_temporal(self): """ Search through HDF4 file, returning timestamps from the 'Mission' vgroup (if it exists) :returns: List containing temporal metadata """ ref = -1 while True: try: ref = self.v.getid(ref) vg = self.v.attach(ref) if vg._name == "Mission": temporal = self._get_temporal(self.vs, self.fname) vg.detach() return temporal vg.detach() except HDF4Error: # End of file # This 'except at end of file' thing is some pyhdf weirdness # Check the pyhdf documentation for clarification break return None def get_properties(self): """ Returns ceda_di.metadata.properties.Properties object containing geospatial and temporal metadata from file. :returns: Metadata.product.Properties object """ geospatial = self.get_geospatial() temporal = self.get_temporal() filesystem = super(HDF4, self).get_filesystem(self.fname) data_format = { "format": "HDF4", } instrument = arsf.Hyperspectral.get_instrument(filesystem["filename"]) flight_info = arsf.Hyperspectral.get_flight_info(filesystem["filename"]) props = product.Properties(spatial=geospatial, temporal=temporal, filesystem=filesystem, data_format=data_format, instrument=instrument, flight_info=flight_info) return props
class HdfFile(GenericFile): def __init__(self, file_path, level, additional_param=None): GenericFile.__init__(self, file_path, level) self.handler_id = "hdf2." self.FILE_FORMAT = "hdf2." #hdf = None #vs = None #v = None def get_handler_id(self): return self.handler_id def _get_coords(self, vs, fn): """ Iterate through vgroup and return a list of coordinates (if existing). :param HDF4.V.vs vs: VData object :param str fn: Path to the data file :returns: Dict containing geospatial information. """ mappings = { "NVlat2": "Latitude", "NVlng2": "Longitude", } coords = {} for k, v in mappings.iteritems(): ref = vs.find(k) vd = vs.attach(ref) coords[v] = [] while True: try: coord = float(vd.read()[0][0]) coord /= 10**7 coords[v].append(coord) except HDF4Error: # End of file break vd.detach() return coords def _get_temporal(self, vs, fn): """ Return start and end timestamps (if existing) :param HDF4.V.vs vs: VData object :param str fn: Path to the data file :returns: Dict containing temporal information. """ mappings = { "MIdate": "date", "MIstime": "start_time", "MIetime": "end_time", } timestamps = {} for k, v in mappings.iteritems(): ref = vs.find(k) vd = vs.attach(ref) timestamps[v] = [] while True: try: timestamps[v].append(vd.read()[0][0]) except HDF4Error: # EOF break vd.detach() # This list comprehension basically converts from a list of integers # into a list of chars and joins them together to make strings # ... # If unclear - HDF text data comes out as a list of integers, e.g.: # 72 101 108 108 111 32 119 111 114 108 100 (this means "Hello world") # Those "char" numbers get converted to strings with this snippet. dates = [chr(x) for x in timestamps["date"] if x != 0] timestamps["date"] = ''.join(dates) return self._parse_timestamps(timestamps) def _parse_timestamps(self, tm_dict): """ Parse start and end timestamps from an HDF4 file. :param dict tm_dict: The timestamp to be parsed :returns: Dict containing start and end timestamps """ st_base = ("%s %s" % (tm_dict["date"], tm_dict["start_time"][0])) et_base = ("%s %s" % (tm_dict["date"], tm_dict["end_time"][0])) for t_format in ["%d/%m/%y %H%M%S", "%d/%m/%Y %H%M%S"]: try: start_time = datetime.datetime.strptime(st_base, t_format) end_time = datetime.datetime.strptime(et_base, t_format) except ValueError: # ValueError will be raised if strptime format doesn't match # the actual timestamp - so just try the next strptime format continue return {"start_time": start_time.isoformat(), "end_time": end_time.isoformat()} def get_geospatial(self): """ Search through HDF4 file, returning a list of coordinates from the 'Navigation' vgroup (if it exists). :returns: Dict containing geospatial information. """ ref = -1 while True: try: ref = self.v.getid(ref) vg = self.v.attach(ref) if vg._name == "Navigation": geospatial = self._get_coords(self.vs, self.file_path) geospatial["type"] = "track" # Type annotation vg.detach() return geospatial vg.detach() except HDF4Error: # End of file # This is a weird way of handling files, but this is what the # pyhdf library demonstrates... break return None def get_temporal(self): """ Search through HDF4 file, returning timestamps from the 'Mission' vgroup (if it exists) :returns: List containing temporal metadata """ ref = -1 while True: try: ref = self.v.getid(ref) vg = self.v.attach(ref) if vg._name == "Mission": temporal = self._get_temporal(self.vs, self.file_path) vg.detach() return temporal vg.detach() except HDF4Error: # End of file # This 'except at end of file' thing is some pyhdf weirdness # Check the pyhdf documentation for clarification break return None def get_phenomena(self, fp): phen_list = [] return phen_list def get_metadata_badccsv_level2(self): return None def get_geolocation(self): # Open file. hdf = SD(self.file_path, SDC.READ) # List available SDS datasets. datasets = hdf.datasets() # Read dataset. #DATAFIELD_NAME='RelHumid_A' #data3D = hdf.select(DATAFIELD_NAME) #data = data3D[11,:,:] # Read geolocation dataset. try: lat = hdf.select('Latitude') latitude = lat[:,:].flatten() lon = hdf.select('Longitude') longitude = lon[:,:].flatten() return (latitude, longitude) except HDF4Error: return None def normalize_coord(self, coord): if coord < -180: coord = 0 return coord def get_metadata_badccsv_level3(self): self.handler_id = "Hdf handler level 3." spatial = None file_info = self.get_metadata_generic_level1() #First method for extracting information. self.hdf = HDF(self.file_path) self.vs = self.hdf.vstart() self.v = self.hdf.vgstart() geospatial = self.get_geospatial() temporal = self.get_temporal() if geospatial is not None: lat_u = self.normalize_coord(float(max(geospatial["Latitude"]))) lat_l = self.normalize_coord(float(min(geospatial["Latitude"]))) lon_u = self.normalize_coord(float(max(geospatial["Longitude"]))) lon_l = self.normalize_coord(float(min(geospatial["Longitude"]))) spatial = {"coordinates": {"type": "envelope", "coordinates": [[round(lon_l, 3), round(lat_l, 3)], [round(lon_u, 3), round(lat_u, 3)]] } } else: #Second method. geospatial = self.get_geolocation() if geospatial is not None: lat_u = self.normalize_coord(float(max(geospatial[0]))) lat_l = self.normalize_coord(float(min(geospatial[0]))) lon_u = self.normalize_coord(float(max(geospatial[1]))) lon_l = self.normalize_coord(float(min(geospatial[1]))) spatial = {"coordinates": {"type": "envelope", "coordinates": [[round(lon_l, 3), round(lat_l, 3)], [round(lon_u, 3), round(lat_u, 3)]] } } if temporal is not None: file_info[0]["info"]["temporal"] = {"start_time": temporal["start_time"], "end_time": temporal["end_time"] } return file_info + (None, spatial, ) def get_metadata(self): if self.level == "1": res = self.get_metadata_generic_level1() elif self.level == "2": res = self.get_metadata_generic_level1() elif self.level == "3": res = self.get_metadata_badccsv_level3() res[0]["info"]["format"] = self.FILE_FORMAT return res def __enter__(self): return self def __exit__(self, *args): pass
def HDFread(filename, variable, vgroup=None): """ Extract the data for non-scientific data in V mode of hdf file """ if vgroup is None: vgroup = 'Geolocation Fields' filename=str(filename) hdf = HDF(filename, HC.READ) # Initialize the SD, V and VS interfaces on the file. sd = SD(filename) vs = hdf.vstart() v = hdf.vgstart() vg_dict={} ref = -1 while 1: try: ref = v.getid(ref) #print('vgroup ref number: ',ref) except HDF4Error as msg: # no more vgroup break vg = v.attach(ref) # print("----------------") # print("vg name is:", vg._name, "class:",vg._class, "tag,ref:", end=' ') # print(vg._tag, vg._refnum) vg_dict[vg._name]=(vg._tag, vg._refnum) vg.detach() tag, ref = vg_dict[vgroup] # Open all data of the class vg = v.attach(ref) # print("----------------") # print("vg name is:", vg._name, "class:",vg._class, "tag,ref:", end=' ') # print(vg._tag, vg._refnum) # All fields in the class members = vg.tagrefs() nrecs = [] names = [] for tag, ref in members: # Vdata tag if tag == HC.DFTAG_VH: vd = vs.attach(ref) nrec, intmode, fields, size, name = vd.inquire() nrecs.append(nrec) names.append(name) vd.detach() try: idx = names.index(variable) except ValueError: error=f'{variable} is not in {names} for vgroup {vgroup}' raise ValueError(error) var = vs.attach(members[idx][1]) V = var.read(nrecs[idx]) var.detach() # Terminate V, VS and SD interfaces. v.end() vs.end() sd.end() # Close HDF file. hdf.close() return np.asarray(V)
class HDF4(_geospatial): """ HDF4 context manager class. """ hdf = None vs = None v = None def __init__(self, fname): """ :param str fname: The path of the HDF4 file. """ self.fname = str(fname) def __enter__(self): """ Open HDF file and interfaces for use as context manager. :returns: Self. """ self.hdf = HDF(self.fname) self.vs = self.hdf.vstart() self.v = self.hdf.vgstart() return self def __exit__(self, *args): """ Close interfaces and HDF file after finishing use in context manager. """ self.v.end() self.vs.end() self.hdf.close() def _get_coords(self, vs, fn): """ Iterate through vgroup and return a list of coordinates (if existing). :param HDF4.V.vs vs: VData object :param str fn: Path to the data file :returns: Dict containing geospatial information. """ mappings = { "NVlat2": "lat", "NVlng2": "lon", } coords = {} for k, v in mappings.iteritems(): ref = vs.find(k) vd = vs.attach(ref) coords[v] = [] while True: try: coord = float(vd.read()[0][0]) coord /= 10**7 coords[v].append(coord) except HDF4Error: # End of file break vd.detach() return coords def _get_temporal(self, vs, fn): """ Return start and end timestamps (if existing) :param HDF4.V.vs vs: VData object :param str fn: Path to the data file :returns: Dict containing temporal information. """ mappings = { "MIdate": "date", "MIstime": "start_time", "MIetime": "end_time", } timestamps = {} for k, v in mappings.iteritems(): ref = vs.find(k) vd = vs.attach(ref) timestamps[v] = [] while True: try: timestamps[v].append(vd.read()[0][0]) except HDF4Error: # EOF break vd.detach() # This list comprehension basically converts from a list of integers # into a list of chars and joins them together to make strings # ... # If unclear - HDF text data comes out as a list of integers, e.g.: # 72 101 108 108 111 32 119 111 114 108 100 (this means "Hello world") # Those "char" numbers get converted to strings with this snippet. dates = [chr(x) for x in timestamps["date"] if x != 0] timestamps["date"] = ''.join(dates) return self._parse_timestamps(timestamps) def _parse_timestamps(self, tm_dict): """ Parse start and end timestamps from an HDF4 file. :param dict tm_dict: The timestamp to be parsed :returns: Dict containing start and end timestamps """ st_base = ("%s %s" % (tm_dict["date"], tm_dict["start_time"][0])) et_base = ("%s %s" % (tm_dict["date"], tm_dict["end_time"][0])) for t_format in ["%d/%m/%y %H%M%S", "%d/%m/%Y %H%M%S"]: try: start_time = datetime.datetime.strptime(st_base, t_format) end_time = datetime.datetime.strptime(et_base, t_format) except ValueError: # ValueError will be raised if strptime format doesn't match # the actual timestamp - so just try the next strptime format continue return { "start_time": start_time.isoformat(), "end_time": end_time.isoformat() } def get_geospatial(self): """ Search through HDF4 file, returning a list of coordinates from the 'Navigation' vgroup (if it exists). :returns: Dict containing geospatial information. """ ref = -1 while True: try: ref = self.v.getid(ref) vg = self.v.attach(ref) if vg._name == "Navigation": geospatial = self._get_coords(self.vs, self.fname) geospatial["type"] = "track" # Type annotation vg.detach() return geospatial vg.detach() except HDF4Error: # End of file # This is a weird way of handling files, but this is what the # pyhdf library demonstrates... break return None def get_temporal(self): """ Search through HDF4 file, returning timestamps from the 'Mission' vgroup (if it exists) :returns: List containing temporal metadata """ ref = -1 while True: try: ref = self.v.getid(ref) vg = self.v.attach(ref) if vg._name == "Mission": temporal = self._get_temporal(self.vs, self.fname) vg.detach() return temporal vg.detach() except HDF4Error: # End of file # This 'except at end of file' thing is some pyhdf weirdness # Check the pyhdf documentation for clarification break return None def get_properties(self): """ Returns ceda_di.metadata.properties.Properties object containing geospatial and temporal metadata from file. :returns: Metadata.product.Properties object """ geospatial = self.get_geospatial() temporal = self.get_temporal() filesystem = super(HDF4, self).get_filesystem(self.fname) data_format = { "format": "HDF4", } instrument = arsf.Hyperspectral.get_instrument(filesystem["filename"]) flight_info = arsf.Hyperspectral.get_flight_info( filesystem["filename"]) props = product.Properties(spatial=geospatial, temporal=temporal, filesystem=filesystem, data_format=data_format, instrument=instrument, flight_info=flight_info) return props