def _create_one_dimensional_coord_list(self, filenames, index_offset=1): """ Create a set of coordinates appropriate for a ond-dimensional (column integrated) variable :param filenames: :param int index_offset: For 5km products this will choose the coordinates which represent the start (0), middle (1) and end (2) of the 15 shots making up each column retrieval. :return: """ from pyhdf.error import HDF4Error from cis.data_io import hdf_sd import datetime as dt from cis.time_util import convert_sec_since_to_std_time, cis_standard_time_unit variables = ['Latitude', 'Longitude', "Profile_Time"] logging.info("Listing coordinates: " + str(variables)) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # latitude lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset] lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lat_coord = Coord(lat_data, lat_metadata, 'Y') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset] lon_metadata = hdf.read_metadata(lon, "SD") lon_coord = Coord(lon_data, lon_metadata, 'X') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset] time_data = convert_sec_since_to_std_time(time_data, dt.datetime(1993, 1, 1, 0, 0, 0)) time_coord = Coord(time_data, Metadata(name='Profile_Time', standard_name='time', shape=time_data.shape, units=cis_standard_time_unit), "T") # create the object containing all coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) coords.append(time_coord) return coords
def test_that_can_get_metadata_for_known_variable(): data_dict = hdf_sd.read(escape_colons(valid_hdf_sd_file)) metadata = hdf_sd.get_metadata(data_dict['Latitude']) eq_(metadata._name, "Latitude") eq_(metadata.standard_name, "latitude") eq_(metadata.long_name, "Geodetic Latitude") eq_(metadata.shape, [203, 135]) eq_(metadata.units, "Degrees_north") eq_(metadata.factor, 1.0) eq_(metadata.offset, 0.0) eq_(metadata.missing_value, -999.0) attr = metadata.misc eq_(len(attr), 5) eq_(attr['Parameter_Type'], "MODIS Input") eq_(attr['valid_range'], [-90.0, 90.0])
def test_that_can_get_metadata_for_known_variable(): data_dict = hdf_sd.read(valid_hdf_sd_file) metadata = hdf_sd.get_metadata(data_dict['Latitude']) eq_(metadata._name, "Latitude") eq_(metadata.standard_name, "latitude") eq_(metadata.long_name, "Geodetic Latitude") eq_(metadata.shape, [203, 135]) eq_(metadata.units, "Degrees_north") eq_(metadata.range, [-90.0, 90.0]) eq_(metadata.factor, 1.0) eq_(metadata.offset, 0.0) eq_(metadata.missing_value, -999.0) attr = metadata.misc eq_(len(attr), 10) eq_(attr['_FillValue'], -999.0) eq_(attr['Parameter_Type'], "MODIS Input")
def __read_hdf4(filename, variables): """ A wrapper method for reading raw data from hdf4 files. This returns a dictionary of io handles for each VD and SD data types. :param filename: A name of a file to read :param variables: List of variables to read from the files :return: (sds_dict, vds_dict) A tuple of dictionaries, one for sds objects and another for vds """ from cis.exceptions import InvalidVariableError from pyhdf.error import HDF4Error variables = utils.listify(variables) # I'd rather not have to make this check but for pyhdf 0.9.0 and hdf 4.2.9 on OS X the c-level read routine will at # some point call exit(138) when reading valid netcdf files (rather than returning a negative status). if not filename.endswith('.hdf'): raise IOError("Tried to read non HDF file: {}".format(filename)) try: sds_dict = hdf_sd.read(filename, variables) # remove the variables identified as SD (i.e. the keys in sds_dict) # no need to try looking for them as VD variable # AND this can cause a crash in some version/implementations of the core HDF4 libraries! # First create a copy of the list in order for the original list to be left intact when elements are removed # from it, this enables the original list to be used when many files are read vdvariables = list(variables) for sds_dict_key in sds_dict: vdvariables.remove(sds_dict_key) vds_dict = hdf_vd.read(filename, vdvariables) except HDF4Error as e: joined_up_message = "".join(e) raise IOError(joined_up_message) for variable in variables: if variable not in sds_dict and variable not in vds_dict: raise InvalidVariableError("Could not find " + variable + " in file: " + filename) return sds_dict, vds_dict
def _read_hdf4(filename, variables): """ A wrapper method for reading raw data from hdf4 files. This returns a dictionary of io handles for each VD and SD data types. :param filename: A name of a file to read :param variables: List of variables to read from the files :return: (sds_dict, vds_dict) A tuple of dictionaries, one for sds objects and another for vds """ from cis.exceptions import InvalidVariableError from pyhdf.error import HDF4Error variables = utils.listify(variables) # I'd rather not have to make this check but for pyhdf 0.9.0 and hdf 4.2.9 on OS X the c-level read routine will at # some point call exit(138) when reading valid netcdf files (rather than returning a negative status). if not filename.endswith('.hdf'): raise IOError("Tried to read non HDF file: {}".format(filename)) try: sds_dict = hdf_sd.read(filename, variables) # remove the variables identified as SD (i.e. the keys in sds_dict) # no need to try looking for them as VD variable # AND this can cause a crash in some version/implementations of the core HDF4 libraries! # First create a copy of the list in order for the original list to be left intact when elements are removed # from it, this enables the original list to be used when many files are read vdvariables = list(variables) for sds_dict_key in sds_dict: vdvariables.remove(sds_dict_key) vds_dict = hdf_vd.read(filename, vdvariables) except HDF4Error as e: raise IOError(str(e)) for variable in variables: if variable not in sds_dict and variable not in vds_dict: raise InvalidVariableError("Could not find " + variable + " in file: " + filename) return sds_dict, vds_dict
def _create_coord_list(self, filenames, index_offset=0): import logging from cis.data_io import hdf as hdf from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import Metadata import cis.utils as utils from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd import datetime as dt from cis.time_util import convert_sec_since_to_std_time, cis_standard_time_unit variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"] logging.info("Listing coordinates: " + str(variables)) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) alt_name = "altitude" logging.info("Additional coordinates: '" + alt_name + "'") # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) alt_data *= 1000.0 # Convert to m len_x = alt_data.shape[0] lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data) len_y = lat_data.shape[0] new_shape = (len_x, len_y) # altitude alt_data = utils.expand_1d_to_2d_array(alt_data, len_y, axis=0) alt_metadata = Metadata(name=alt_name, standard_name=alt_name, shape=new_shape) alt_coord = Coord(alt_data, alt_metadata) # pressure if self.include_pressure: pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data) pres_metadata = hdf.read_metadata(sdata['Pressure'], "SD") # Fix badly formatted units which aren't CF compliant and will break if they are aggregated if str(pres_metadata.units) == "hPA": pres_metadata.units = "hPa" pres_metadata.shape = new_shape pres_coord = Coord(pres_data, pres_metadata, 'P') # latitude lat_data = utils.expand_1d_to_2d_array(lat_data[:, index_offset], len_x, axis=1) lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lat_metadata.shape = new_shape lat_coord = Coord(lat_data, lat_metadata, 'Y') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data) lon_data = utils.expand_1d_to_2d_array(lon_data[:, index_offset], len_x, axis=1) lon_metadata = hdf.read_metadata(lon, "SD") lon_metadata.shape = new_shape lon_coord = Coord(lon_data, lon_metadata, 'X') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data) time_data = convert_sec_since_to_std_time(time_data, dt.datetime(1993, 1, 1, 0, 0, 0)) time_data = utils.expand_1d_to_2d_array(time_data[:, index_offset], len_x, axis=1) time_coord = Coord(time_data, Metadata(name='Profile_Time', standard_name='time', shape=time_data.shape, units=cis_standard_time_unit), "T") # create the object containing all coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) coords.append(time_coord) coords.append(alt_coord) if self.include_pressure and (pres_data.shape == alt_data.shape): # For MODIS L1 this may is not be true, so skips the air pressure reading. If required for MODIS L1 then # some kind of interpolation of the air pressure would be required, as it is on a different (smaller) grid # than for the Lidar_Data_Altitudes. coords.append(pres_coord) return coords
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit logging.debug("Creating data object for variable " + variable) variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) alt_name = "altitude" logging.info("Additional coordinates: '" + alt_name + "'") # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset] lat_coord = AuxCoord(lat_data, standard_name='latitude') pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset] lon_coord = AuxCoord(lon_data, standard_name='longitude') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset] time_coord = DimCoord(time_data, long_name='Profile_Time', standard_name='time', units="seconds since 1993-01-01 00:00:00") time_coord.convert_units(cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") if variable in MIXED_RESOLUTION_VARIABLES: logging.warning("Using Level 2 resolution profile for mixed resolution variable {}. See CALIPSO " "documentation for more details".format(variable)) data = hdf.read_data(var, self._get_mixed_resolution_calipso_data) else: data = hdf.read_data(var, self._get_calipso_data) cube = Cube(data, long_name=metadata.long_name, units=self.clean_units(metadata.units), dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)], aux_coords_and_dims=[(lat_coord, (0,)), (lon_coord, (0,)), (pres_coord, (0, 1))]) gd = GriddedData.make_from_cube(cube) return gd
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube, CubeList from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit from datetime import datetime from iris.util import new_axis import numpy as np logging.debug("Creating data object for variable " + variable) variables = ["Pressure_Mean"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :] alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :] lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north') lon_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :] lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east') cubes = CubeList() for f in filenames: t = get_data(VDS(f, "Nominal_Year_Month"), True)[0] time_data = cis_standard_time_unit.date2num(datetime(int(t[0:4]), int(t[4:6]), 15)) time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time', units=cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable)) pres_data = self._get_calipso_data(hdf_sd.HDF_SDS(f, 'Pressure_Mean')) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') if data.ndim == 2: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') cubes.append(new_cube) elif data.ndim == 3: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') # Then add the (extended) pressure coord so that it is explicitly a function of time new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3)) cubes.append(new_cube) else: raise ValueError("Unexpected number of dimensions for CALIOP data: {}".format(data.ndim)) # Concatenate the cubes from each file into a single GriddedData object gd = GriddedData.make_from_cube(cubes.concatenate_cube()) return gd
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit logging.debug("Creating data object for variable " + variable) variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) alt_name = "altitude" logging.info("Additional coordinates: '" + alt_name + "'") # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset] lat_coord = AuxCoord(lat_data, standard_name='latitude') pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset] lon_coord = AuxCoord(lon_data, standard_name='longitude') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset] time_coord = DimCoord(time_data, long_name='Profile_Time', standard_name='time', units="seconds since 1993-01-01 00:00:00") time_coord.convert_units(cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") if variable in MIXED_RESOLUTION_VARIABLES: logging.warning( "Using Level 2 resolution profile for mixed resolution variable {}. See CALIPSO " "documentation for more details".format(variable)) data = hdf.read_data(var, self._get_mixed_resolution_calipso_data) else: data = hdf.read_data(var, self._get_calipso_data) cube = Cube(data, long_name=metadata.long_name, units=self.clean_units(metadata.units), dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)], aux_coords_and_dims=[(lat_coord, (0, )), (lon_coord, (0, )), (pres_coord, (0, 1))]) gd = GriddedData.make_from_cube(cube) return gd
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube, CubeList from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit from datetime import datetime from iris.util import new_axis import numpy as np logging.debug("Creating data object for variable " + variable) variables = ["Pressure_Mean"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :] alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :] lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north') lon_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :] lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east') cubes = CubeList() for f in filenames: t = get_data(VDS(f, "Nominal_Year_Month"), True)[0] time_data = cis_standard_time_unit.date2num( datetime(int(t[0:4]), int(t[4:6]), 15)) time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time', units=cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable)) pres_data = self._get_calipso_data( hdf_sd.HDF_SDS(f, 'Pressure_Mean')) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') if data.ndim == 2: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') cubes.append(new_cube) elif data.ndim == 3: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') # Then add the (extended) pressure coord so that it is explicitly a function of time new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3)) cubes.append(new_cube) else: raise ValueError( "Unexpected number of dimensions for CALIOP data: {}". format(data.ndim)) # Concatenate the cubes from each file into a single GriddedData object gd = GriddedData.make_from_cube(cubes.concatenate_cube()) return gd
def test_that_can_get_data(): data_dict = hdf_sd.read(valid_hdf_sd_file) data = hdf_sd.get_data(data_dict['Latitude']) eq_(data.shape, (203, 135))
def test_that_can_read_known_variables(): data_dict = hdf_sd.read(valid_hdf_sd_file, ['Latitude', 'Longitude']) eq_(len(data_dict), 2)
def test_that_can_read_all_variables(): data_dict = hdf_sd.read(valid_hdf_sd_file) eq_(len(data_dict), 67)
def test_that_can_get_data(): data_dict = hdf_sd.read(escape_colons(valid_hdf_sd_file)) data = hdf_sd.get_data(data_dict['Latitude']) eq_(data.shape, (203, 135))
def test_that_can_read_known_variables(): data_dict = hdf_sd.read(escape_colons(valid_hdf_sd_file), ['Latitude', 'Longitude']) eq_(len(data_dict), 2)
def test_that_can_read_all_variables(): data_dict = hdf_sd.read(escape_colons(valid_hdf_sd_file)) eq_(len(data_dict), 67)