def _create_cube(self, filenames, variable): import numpy as np from cis.data_io.hdf import _read_hdf4 from cis.data_io import hdf_vd from iris.cube import Cube, CubeList from iris.coords import DimCoord, AuxCoord from cis.time_util import calculate_mid_time, cis_standard_time_unit from cis.data_io.hdf_sd import get_metadata from cf_units import Unit variables = ['XDim:GlobalGrid', 'YDim:GlobalGrid', variable] logging.info("Listing coordinates: " + str(variables)) cube_list = CubeList() # Read each file individually, let Iris do the merging at the end. for f in filenames: sdata, vdata = _read_hdf4(f, variables) lat_points = np.linspace(-90., 90., hdf_vd.get_data(vdata['YDim:GlobalGrid'])) lon_points = np.linspace(-180., 180., hdf_vd.get_data(vdata['XDim:GlobalGrid'])) lat_coord = DimCoord(lat_points, standard_name='latitude', units='degrees') lon_coord = DimCoord(lon_points, standard_name='longitude', units='degrees') # create time coordinate using the midpoint of the time delta between the start date and the end date start_datetime = self._get_start_date(f) end_datetime = self._get_end_date(f) mid_datetime = calculate_mid_time(start_datetime, end_datetime) logging.debug("Using {} as datetime for file {}".format(mid_datetime, f)) time_coord = AuxCoord(mid_datetime, standard_name='time', units=cis_standard_time_unit, bounds=[start_datetime, end_datetime]) var = sdata[variable] metadata = get_metadata(var) try: units = Unit(metadata.units) except ValueError: logging.warning("Unable to parse units '{}' in {} for {}.".format(metadata.units, f, variable)) units = None cube = Cube(_get_MODIS_SDS_data(sdata[variable]), dim_coords_and_dims=[(lon_coord, 1), (lat_coord, 0)], aux_coords_and_dims=[(time_coord, None)], var_name=metadata._name, long_name=metadata.long_name, units=units) cube_list.append(cube) # Merge the cube list across the scalar time coordinates before returning a single cube. return cube_list.merge_cube()
def _generate_time_array(self, vdata): import cis.data_io.hdf_vd as hdf_vd import datetime as dt from cis.time_util import convert_sec_since_to_std_time Cloudsat_start_time = dt.datetime(1993, 1, 1, 0, 0, 0) arrays = [] for i, j in zip(vdata['Profile_time'], vdata['TAI_start']): time = hdf_vd.get_data(i) start = hdf_vd.get_data(j) time += start # Do the conversion to standard time here before we expand the time array... time = convert_sec_since_to_std_time(time, Cloudsat_start_time) arrays.append(time) return utils.concatenate(arrays)
def get_variable_names(self, filenames, data_type=None): try: from pyhdf.SD import SD except ImportError: raise ImportError( "HDF support was not installed, please reinstall with pyhdf to read HDF files." ) variables = set([]) # Determine the valid shape for variables sd = SD(filenames[0]) datasets = sd.datasets() len_x = datasets['Latitude'][1][ 0] # Assumes that latitude shape == longitude shape (it should) alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) len_y = alt_data.shape[0] valid_shape = (len_x, len_y) for filename in filenames: sd = SD(filename) for var_name, var_info in sd.datasets().items(): if var_info[1] == valid_shape: variables.add(var_name) return variables
def read_data(data_dict, data_type, missing_values=None): if data_type == 'VD': out = utils.concatenate([hdf_vd.get_data(i, missing_values=missing_values) for i in data_dict]) elif data_type == 'SD': out = utils.concatenate([hdf_sd.get_data(i, missing_values=missing_values) for i in data_dict]) else: raise ValueError("Invalid data-type: %s, HDF variables must be VD or SD only" % data_type) return out
def read_data(data_list, read_function): """ Wrapper for calling an HDF reading function for each dataset, and then concatenating the result. :param list data_list: A list of data objects to read :param callable or str read_function: A function for reading the data, or 'SD' or 'VD' for default reading routines. :return: A single numpy array of concatenated data values. """ if callable(read_function): out = utils.concatenate([read_function(i) for i in data_list]) elif read_function == 'VD': out = utils.concatenate([hdf_vd.get_data(i) for i in data_list]) elif read_function == 'SD': out = utils.concatenate([hdf_sd.get_data(i) for i in data_list]) else: raise ValueError("Invalid read-function: {}, please supply a callable read " "function, 'VD' or 'SD' only".format(read_function)) return out
def read_data(data_list, read_function): """ Wrapper for calling an HDF reading function for each dataset, and then concatenating the result. :param list data_list: A list of data objects to read :param callable or str read_function: A function for reading the data, or 'SD' or 'VD' for default reading routines. :return: A single numpy array of concatenated data values. """ if callable(read_function): out = utils.concatenate([read_function(i) for i in data_list]) elif read_function == 'VD': out = utils.concatenate([hdf_vd.get_data(i) for i in data_list]) elif read_function == 'SD': out = utils.concatenate([hdf_sd.get_data(i) for i in data_list]) else: raise ValueError( "Invalid read-function: {}, please supply a callable read " "function, 'VD' or 'SD' only".format(read_function)) return out
def get_variable_names(self, filenames, data_type=None): try: from pyhdf.SD import SD except ImportError: raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.") variables = set([]) # Determine the valid shape for variables sd = SD(filenames[0]) datasets = sd.datasets() len_x = datasets['Latitude'][1][0] # Assumes that latitude shape == longitude shape (it should) alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) len_y = alt_data.shape[0] valid_shape = (len_x, len_y) for filename in filenames: sd = SD(filename) for var_name, var_info in sd.datasets().items(): if var_info[1] == valid_shape: variables.add(var_name) return variables
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit logging.debug("Creating data object for variable " + variable) variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) alt_name = "altitude" logging.info("Additional coordinates: '" + alt_name + "'") # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset] lat_coord = AuxCoord(lat_data, standard_name='latitude') pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset] lon_coord = AuxCoord(lon_data, standard_name='longitude') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset] time_coord = DimCoord(time_data, long_name='Profile_Time', standard_name='time', units="seconds since 1993-01-01 00:00:00") time_coord.convert_units(cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") if variable in MIXED_RESOLUTION_VARIABLES: logging.warning( "Using Level 2 resolution profile for mixed resolution variable {}. See CALIPSO " "documentation for more details".format(variable)) data = hdf.read_data(var, self._get_mixed_resolution_calipso_data) else: data = hdf.read_data(var, self._get_calipso_data) cube = Cube(data, long_name=metadata.long_name, units=self.clean_units(metadata.units), dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)], aux_coords_and_dims=[(lat_coord, (0, )), (lon_coord, (0, )), (pres_coord, (0, 1))]) gd = GriddedData.make_from_cube(cube) return gd
def _get_cloudsat_sds_data(self, sds): """ Reads raw data from an SD instance. Automatically applies the scaling factors and offsets to the data arrays often found in NASA HDF-EOS data (e.g. MODIS) :param sds: The specific sds instance to read :return: A numpy array containing the raw data with missing data is replaced by NaN. """ from cis.utils import create_masked_array_for_missing_data import numpy as np from cis.data_io.hdf_vd import VDS, get_data from pyhdf.error import HDF4Error data = sds.get() attributes = sds.attributes() # First deal with the Fill value fill_value = attributes.get('_FillValue', None) if fill_value is not None: data = create_masked_array_for_missing_data(data, fill_value) # TODO: This needs some explict integration and unit tests # Then deal with missing values missop_fn = {'<': np.ma.masked_less, '<=': np.ma.masked_less_equal, '==': np.ma.masked_equal, '=>': np.ma.masked_greater_equal, '>': np.ma.masked_greater, # TODO Note that this is wrong but seems to be what is meant, for Cloud_Effective_Radius at # least... 'ge': np.ma.masked_equal, 'eq': np.ma.masked_equal} missing = attributes.get('missing', None) missop = attributes.get('missop', None) if missing is not None and missop is not None: try: logging.debug("Masking all values v {} {}".format(missop, missing)) data = missop_fn[missop](data, missing) except KeyError: logging.warning("Unable to identify missop {}, unable to " "mask missing values for {}.".format(missop, sds.info()[0])) # Now handle valid range mask valid_range = attributes.get('valid_range', None) if valid_range is not None: # Assume it's the right data type already logging.debug("Masking all values {} > v > {}.".format(*valid_range)) data = np.ma.masked_outside(data, *valid_range) # Offsets and scaling - these come from Vdata variables with the appropraite suffixes try: offset = get_data(VDS(sds._filename, sds._variable + "_add_offset"))[0] except HDF4Error: print("WARNING: Couldn't find offset variable " + sds._variable + "_add_offset") offset = 0 try: scale_factor = get_data(VDS(sds._filename, sds._variable + "_scale_factor"))[0] except HDF4Error: print("WARNING: Couldn't find scale factor variable " + sds._variable + "_scale_factor") scale_factor = 1 data = self._apply_scaling_factor_CLOUDSAT(data, scale_factor, offset) return data
def test_that_can_get_data(): vds_dict = hdf_vd.read(valid_hdf_vd_file, 'DEM_elevation') vds = vds_dict['DEM_elevation'] data = hdf_vd.get_data(vds) eq_(37081, len(data))
def _create_cube(self, filenames, variable): import numpy as np from cis.data_io.hdf import _read_hdf4 from cis.data_io import hdf_vd from iris.cube import Cube, CubeList from iris.coords import DimCoord, AuxCoord from cis.time_util import calculate_mid_time, cis_standard_time_unit from cis.data_io.hdf_sd import get_metadata from cf_units import Unit variables = ['XDim:GlobalGrid', 'YDim:GlobalGrid', variable] logging.info("Listing coordinates: " + str(variables)) cube_list = CubeList() # Read each file individually, let Iris do the merging at the end. for f in filenames: sdata, vdata = _read_hdf4(f, variables) lat_points = np.linspace(-90., 90., hdf_vd.get_data(vdata['YDim:GlobalGrid'])) lon_points = np.linspace(-180., 180., hdf_vd.get_data(vdata['XDim:GlobalGrid'])) lat_coord = DimCoord(lat_points, standard_name='latitude', units='degrees') lon_coord = DimCoord(lon_points, standard_name='longitude', units='degrees') # create time coordinate using the midpoint of the time delta between the start date and the end date start_datetime = self._get_start_date(f) end_datetime = self._get_end_date(f) mid_datetime = calculate_mid_time(start_datetime, end_datetime) logging.debug("Using {} as datetime for file {}".format( mid_datetime, f)) time_coord = AuxCoord(mid_datetime, standard_name='time', units=cis_standard_time_unit, bounds=[start_datetime, end_datetime]) var = sdata[variable] metadata = get_metadata(var) try: units = Unit(metadata.units) except ValueError: logging.warning( "Unable to parse units '{}' in {} for {}.".format( metadata.units, f, variable)) units = None cube = Cube(_get_MODIS_SDS_data(sdata[variable]), dim_coords_and_dims=[(lon_coord, 1), (lat_coord, 0)], aux_coords_and_dims=[(time_coord, None)], var_name=metadata._name, long_name=metadata.long_name, units=units) cube_list.append(cube) # Merge the cube list across the scalar time coordinates before returning a single cube. return cube_list.merge_cube()
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit logging.debug("Creating data object for variable " + variable) variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) alt_name = "altitude" logging.info("Additional coordinates: '" + alt_name + "'") # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset] lat_coord = AuxCoord(lat_data, standard_name='latitude') pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset] lon_coord = AuxCoord(lon_data, standard_name='longitude') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset] time_coord = DimCoord(time_data, long_name='Profile_Time', standard_name='time', units="seconds since 1993-01-01 00:00:00") time_coord.convert_units(cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") if variable in MIXED_RESOLUTION_VARIABLES: logging.warning("Using Level 2 resolution profile for mixed resolution variable {}. See CALIPSO " "documentation for more details".format(variable)) data = hdf.read_data(var, self._get_mixed_resolution_calipso_data) else: data = hdf.read_data(var, self._get_calipso_data) cube = Cube(data, long_name=metadata.long_name, units=self.clean_units(metadata.units), dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)], aux_coords_and_dims=[(lat_coord, (0,)), (lon_coord, (0,)), (pres_coord, (0, 1))]) gd = GriddedData.make_from_cube(cube) return gd
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube, CubeList from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit from datetime import datetime from iris.util import new_axis import numpy as np logging.debug("Creating data object for variable " + variable) variables = ["Pressure_Mean"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :] alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :] lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north') lon_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :] lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east') cubes = CubeList() for f in filenames: t = get_data(VDS(f, "Nominal_Year_Month"), True)[0] time_data = cis_standard_time_unit.date2num(datetime(int(t[0:4]), int(t[4:6]), 15)) time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time', units=cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable)) pres_data = self._get_calipso_data(hdf_sd.HDF_SDS(f, 'Pressure_Mean')) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') if data.ndim == 2: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') cubes.append(new_cube) elif data.ndim == 3: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') # Then add the (extended) pressure coord so that it is explicitly a function of time new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3)) cubes.append(new_cube) else: raise ValueError("Unexpected number of dimensions for CALIOP data: {}".format(data.ndim)) # Concatenate the cubes from each file into a single GriddedData object gd = GriddedData.make_from_cube(cubes.concatenate_cube()) return gd
def test_that_can_get_data(): vds_dict = hdf_vd.read(escape_colons(valid_hdf_vd_file), 'DEM_elevation') vds = vds_dict['DEM_elevation'] data = hdf_vd.get_data(vds) eq_(37081, len(data))
def _get_cloudsat_sds_data(self, sds): """ Reads raw data from an SD instance. Automatically applies the scaling factors and offsets to the data arrays often found in NASA HDF-EOS data (e.g. MODIS) :param sds: The specific sds instance to read :return: A numpy array containing the raw data with missing data is replaced by NaN. """ from cis.utils import create_masked_array_for_missing_data import numpy as np from cis.data_io.hdf_vd import VDS, get_data from pyhdf.error import HDF4Error data = sds.get() attributes = sds.attributes() # First deal with the Fill value fill_value = attributes.get('_FillValue', None) if fill_value is not None: data = create_masked_array_for_missing_data(data, fill_value) # TODO: This needs some explict integration and unit tests # Then deal with missing values missop_fn = { '<': np.ma.masked_less, '<=': np.ma.masked_less_equal, '==': np.ma.masked_equal, '=>': np.ma.masked_greater_equal, '>': np.ma.masked_greater, # TODO Note that this is wrong but seems to be what is meant, for Cloud_Effective_Radius at # least... 'ge': np.ma.masked_equal, 'eq': np.ma.masked_equal } missing = attributes.get('missing', None) missop = attributes.get('missop', None) if missing is not None and missop is not None: try: logging.debug("Masking all values v {} {}".format( missop, missing)) data = missop_fn[missop](data, missing) except KeyError: logging.warning("Unable to identify missop {}, unable to " "mask missing values for {}.".format( missop, sds.info()[0])) # Now handle valid range mask valid_range = attributes.get('valid_range', None) if valid_range is not None: # Assume it's the right data type already logging.debug( "Masking all values {} > v > {}.".format(*valid_range)) data = np.ma.masked_outside(data, *valid_range) # Offsets and scaling - these come from Vdata variables with the appropraite suffixes try: offset = get_data(VDS(sds._filename, sds._variable + "_add_offset"))[0] except HDF4Error: print("WARNING: Couldn't find offset variable " + sds._variable + "_add_offset") offset = 0 try: scale_factor = get_data( VDS(sds._filename, sds._variable + "_scale_factor"))[0] except HDF4Error: print("WARNING: Couldn't find scale factor variable " + sds._variable + "_scale_factor") scale_factor = 1 data = self._apply_scaling_factor_CLOUDSAT(data, scale_factor, offset) return data
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube, CubeList from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit from datetime import datetime from iris.util import new_axis import numpy as np logging.debug("Creating data object for variable " + variable) variables = ["Pressure_Mean"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :] alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :] lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north') lon_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :] lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east') cubes = CubeList() for f in filenames: t = get_data(VDS(f, "Nominal_Year_Month"), True)[0] time_data = cis_standard_time_unit.date2num( datetime(int(t[0:4]), int(t[4:6]), 15)) time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time', units=cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable)) pres_data = self._get_calipso_data( hdf_sd.HDF_SDS(f, 'Pressure_Mean')) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') if data.ndim == 2: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') cubes.append(new_cube) elif data.ndim == 3: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') # Then add the (extended) pressure coord so that it is explicitly a function of time new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3)) cubes.append(new_cube) else: raise ValueError( "Unexpected number of dimensions for CALIOP data: {}". format(data.ndim)) # Concatenate the cubes from each file into a single GriddedData object gd = GriddedData.make_from_cube(cubes.concatenate_cube()) return gd
def _create_coord_list(self, filenames, index_offset=0): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd import datetime as dt from cis.time_util import convert_sec_since_to_std_time, cis_standard_time_unit variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"] logging.info("Listing coordinates: " + str(variables)) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) alt_name = "altitude" logging.info("Additional coordinates: '" + alt_name + "'") # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) alt_data *= 1000.0 # Convert to m len_x = alt_data.shape[0] lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data) len_y = lat_data.shape[0] new_shape = (len_x, len_y) # altitude alt_data = utils.expand_1d_to_2d_array(alt_data, len_y, axis=0) alt_metadata = Metadata(name=alt_name, standard_name=alt_name, shape=new_shape) alt_coord = Coord(alt_data, alt_metadata) # pressure if self.include_pressure: pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data) pres_metadata = hdf.read_metadata(sdata['Pressure'], "SD") # Fix badly formatted units which aren't CF compliant and will break if they are aggregated if str(pres_metadata.units) == "hPA": pres_metadata.units = "hPa" pres_metadata.shape = new_shape pres_coord = Coord(pres_data, pres_metadata, 'P') # latitude lat_data = utils.expand_1d_to_2d_array(lat_data[:, index_offset], len_x, axis=1) lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lat_metadata.shape = new_shape lat_coord = Coord(lat_data, lat_metadata, 'Y') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data) lon_data = utils.expand_1d_to_2d_array(lon_data[:, index_offset], len_x, axis=1) lon_metadata = hdf.read_metadata(lon, "SD") lon_metadata.shape = new_shape lon_coord = Coord(lon_data, lon_metadata, 'X') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data) time_data = convert_sec_since_to_std_time(time_data, dt.datetime(1993, 1, 1, 0, 0, 0)) time_data = utils.expand_1d_to_2d_array(time_data[:, index_offset], len_x, axis=1) time_coord = Coord(time_data, Metadata(name='Profile_Time', standard_name='time', shape=time_data.shape, units=cis_standard_time_unit), "T") # create the object containing all coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) coords.append(time_coord) coords.append(alt_coord) if self.include_pressure and (pres_data.shape == alt_data.shape): # For MODIS L1 this may is not be true, so skips the air pressure reading. If required for MODIS L1 then # some kind of interpolation of the air pressure would be required, as it is on a different (smaller) grid # than for the Lidar_Data_Altitudes. coords.append(pres_coord) return coords