def read_many_files_individually(filenames, usr_variables): """ Read multiple Variables from many NetCDF files manually - i.e. not with MFDataset as this doesn't always work, in particular for NetCDF4 files. :param filenames: A list of NetCDF filenames to read, or a string with wildcards. :param usr_variables: A list of variable (dataset) names to read from the files. The names must appear exactly as in in the NetCDF file. Variable names may be fully qualified NetCDF4 Hierarchical group variables in the form ``<group1>/<group2....>/<variable_name>``, e.g. ``AVHRR/Ch4CentralWavenumber``. :return: A dictionary of lists of variable instances constructed from all of the input files with the fully qualified variable name as the key """ from cis.utils import add_element_to_list_in_dict usr_variables = listify(usr_variables) var_data = {} for filename in filenames: var_dict = read(filename, usr_variables) for var in list(var_dict.keys()): add_element_to_list_in_dict(var_data, var, var_dict[var]) return var_data
def _create_one_dimensional_coord_list(self, filenames, index_offset=1): """ Create a set of coordinates appropriate for a ond-dimensional (column integrated) variable :param filenames: :param int index_offset: For 5km products this will choose the coordinates which represent the start (0), middle (1) and end (2) of the 15 shots making up each column retrieval. :return: """ from pyhdf.error import HDF4Error from cis.data_io import hdf_sd import datetime as dt from cis.time_util import convert_sec_since_to_std_time, cis_standard_time_unit variables = ['Latitude', 'Longitude', "Profile_Time"] logging.info("Listing coordinates: " + str(variables)) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # latitude lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset] lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lat_coord = Coord(lat_data, lat_metadata, 'Y') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset] lon_metadata = hdf.read_metadata(lon, "SD") lon_coord = Coord(lon_data, lon_metadata, 'X') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset] time_data = convert_sec_since_to_std_time(time_data, dt.datetime(1993, 1, 1, 0, 0, 0)) time_coord = Coord(time_data, Metadata(name='Profile_Time', standard_name='time', shape=time_data.shape, units=cis_standard_time_unit), "T") # create the object containing all coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) coords.append(time_coord) return coords
def load_multiple_aeronet(filenames, variables=None): from cis.utils import add_element_to_list_in_dict, concatenate adata = {} for filename in filenames: logging.debug("reading file: " + filename) # reading in all variables into a dictionary: # a_dict, key: variable name, value: list of masked arrays a_dict = load_aeronet(filename, variables) for var in list(a_dict.keys()): add_element_to_list_in_dict(adata, var, a_dict[var]) for var in list(adata.keys()): adata[var] = concatenate(adata[var]) return adata
def read(filenames, variables): sdata = {} vdata = {} for filename in filenames: logging.debug("reading file: " + filename) # reading in all variables into a 2 dictionaries: # sdata, key: variable name, value: list of sds # vdata, key: variable name, value: list of vds sds_dict, vds_dict = _read_hdf4(filename, variables) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) for var in list(vds_dict.keys()): utils.add_element_to_list_in_dict(vdata, var, vds_dict[var]) return sdata, vdata
def load_multiple_hysplit(fnames, variables=None): from cis.utils import add_element_to_list_in_dict, concatenate hdata = {} for filename in fnames: logging.debug("reading file: " + filename) # read in all trajectories # h_dict, key: trajectory starting lat/lon/altm value: dict containing trajectory data h_dict = load_hysplit(filename, variables) for traj in list(h_dict.keys()): if (traj in hdata): for var in list(h_dict[traj].keys()): # TODO error appending masked array! add these manually add_element_to_list_in_dict(hdata[traj], var, h_dict[traj][var]) for var in list(hdata[traj].keys()): hdata[traj][var] = concatenate(hdata[traj][var]) else: hdata[traj] = h_dict[traj] return hdata
def load_hysplit(fname, variables=None): import numpy as np from numpy import ma from datetime import datetime, timedelta from cis.time_util import cis_standard_time_unit from cis.utils import add_element_to_list_in_dict, concatenate std_day = cis_standard_time_unit.num2date(0) fmetadata = get_file_metadata(fname) try: rawd = np.genfromtxt(fname, skip_header=fmetadata['data_start'], dtype=np.float64, usemask=True) except (StopIteration, IndexError) as e: raise IOError(e) data_dict = {} # Get data for one trajectory at a time for t in range(1, fmetadata['n_trajectories']+1): tdata_dict = {} trajectory_data = rawd[rawd[:,hysplit_default_var.index('TRAJECTORY_NO')] == t] # Convert time from each row to standard time for trajectory in trajectory_data: day = datetime((int(trajectory[2]) + 2000), # TODO Dan: is it okay to assume this? int(trajectory[3]), int(trajectory[4])) sday = float((day - std_day).days) td = timedelta(hours=int(trajectory[5]), minutes=int(trajectory[6])) fractional_day = td.total_seconds()/(24.0*60.0*60.0) dt = sday + fractional_day add_element_to_list_in_dict(tdata_dict, 'DATETIMES', [dt]) # Clean up data tdata_dict['DATETIMES'] = ma.array(concatenate(tdata_dict['DATETIMES'])) # TODO mask is only one value # Add other default data tdata_dict['LAT'] = trajectory_data[:,hysplit_default_var.index('LAT')] tdata_dict['LON'] = trajectory_data[:,hysplit_default_var.index('LON')] tdata_dict['ALT'] = trajectory_data[:,hysplit_default_var.index('ALT')] tdata_dict['PRESSURE'] = trajectory_data[:, hysplit_default_var.index('PRESSURE')] # TODO any other default variables to add? # If variables set, fetch only set variables if variables is not None: for key in variables: try: tdata_dict[key] = trajectory_data[:,fmetadata['labels'].index(key)] except ValueError: raise InvalidVariableError(key + "does not exist in " + fname) # Else, return all variables in file else: for label in fmetadata['custom_labels']: try: tdata_dict[label] = trajectory_data[:,fmetadata['labels'].index(label)] except ValueError: raise InvalidVariableError(key + " does not exist in " + fname) # TODO trajectory keys are tuples of lat/long/alt tkey = fmetadata['trajectories'][t] data_dict[tkey] = tdata_dict return data_dict
def _create_coord_list(self, filenames, index_offset=0): import logging from cis.data_io import hdf as hdf from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import Metadata import cis.utils as utils from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd import datetime as dt from cis.time_util import convert_sec_since_to_std_time, cis_standard_time_unit variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"] logging.info("Listing coordinates: " + str(variables)) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) alt_name = "altitude" logging.info("Additional coordinates: '" + alt_name + "'") # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) alt_data *= 1000.0 # Convert to m len_x = alt_data.shape[0] lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data) len_y = lat_data.shape[0] new_shape = (len_x, len_y) # altitude alt_data = utils.expand_1d_to_2d_array(alt_data, len_y, axis=0) alt_metadata = Metadata(name=alt_name, standard_name=alt_name, shape=new_shape) alt_coord = Coord(alt_data, alt_metadata) # pressure if self.include_pressure: pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data) pres_metadata = hdf.read_metadata(sdata['Pressure'], "SD") # Fix badly formatted units which aren't CF compliant and will break if they are aggregated if str(pres_metadata.units) == "hPA": pres_metadata.units = "hPa" pres_metadata.shape = new_shape pres_coord = Coord(pres_data, pres_metadata, 'P') # latitude lat_data = utils.expand_1d_to_2d_array(lat_data[:, index_offset], len_x, axis=1) lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lat_metadata.shape = new_shape lat_coord = Coord(lat_data, lat_metadata, 'Y') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data) lon_data = utils.expand_1d_to_2d_array(lon_data[:, index_offset], len_x, axis=1) lon_metadata = hdf.read_metadata(lon, "SD") lon_metadata.shape = new_shape lon_coord = Coord(lon_data, lon_metadata, 'X') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data) time_data = convert_sec_since_to_std_time(time_data, dt.datetime(1993, 1, 1, 0, 0, 0)) time_data = utils.expand_1d_to_2d_array(time_data[:, index_offset], len_x, axis=1) time_coord = Coord(time_data, Metadata(name='Profile_Time', standard_name='time', shape=time_data.shape, units=cis_standard_time_unit), "T") # create the object containing all coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) coords.append(time_coord) coords.append(alt_coord) if self.include_pressure and (pres_data.shape == alt_data.shape): # For MODIS L1 this may is not be true, so skips the air pressure reading. If required for MODIS L1 then # some kind of interpolation of the air pressure would be required, as it is on a different (smaller) grid # than for the Lidar_Data_Altitudes. coords.append(pres_coord) return coords
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit logging.debug("Creating data object for variable " + variable) variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) alt_name = "altitude" logging.info("Additional coordinates: '" + alt_name + "'") # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset] lat_coord = AuxCoord(lat_data, standard_name='latitude') pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset] lon_coord = AuxCoord(lon_data, standard_name='longitude') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset] time_coord = DimCoord(time_data, long_name='Profile_Time', standard_name='time', units="seconds since 1993-01-01 00:00:00") time_coord.convert_units(cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") if variable in MIXED_RESOLUTION_VARIABLES: logging.warning("Using Level 2 resolution profile for mixed resolution variable {}. See CALIPSO " "documentation for more details".format(variable)) data = hdf.read_data(var, self._get_mixed_resolution_calipso_data) else: data = hdf.read_data(var, self._get_calipso_data) cube = Cube(data, long_name=metadata.long_name, units=self.clean_units(metadata.units), dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)], aux_coords_and_dims=[(lat_coord, (0,)), (lon_coord, (0,)), (pres_coord, (0, 1))]) gd = GriddedData.make_from_cube(cube) return gd
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube, CubeList from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit from datetime import datetime from iris.util import new_axis import numpy as np logging.debug("Creating data object for variable " + variable) variables = ["Pressure_Mean"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :] alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :] lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north') lon_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :] lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east') cubes = CubeList() for f in filenames: t = get_data(VDS(f, "Nominal_Year_Month"), True)[0] time_data = cis_standard_time_unit.date2num(datetime(int(t[0:4]), int(t[4:6]), 15)) time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time', units=cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable)) pres_data = self._get_calipso_data(hdf_sd.HDF_SDS(f, 'Pressure_Mean')) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') if data.ndim == 2: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') cubes.append(new_cube) elif data.ndim == 3: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') # Then add the (extended) pressure coord so that it is explicitly a function of time new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3)) cubes.append(new_cube) else: raise ValueError("Unexpected number of dimensions for CALIOP data: {}".format(data.ndim)) # Concatenate the cubes from each file into a single GriddedData object gd = GriddedData.make_from_cube(cubes.concatenate_cube()) return gd
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit logging.debug("Creating data object for variable " + variable) variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) alt_name = "altitude" logging.info("Additional coordinates: '" + alt_name + "'") # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset] lat_coord = AuxCoord(lat_data, standard_name='latitude') pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset] lon_coord = AuxCoord(lon_data, standard_name='longitude') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset] time_coord = DimCoord(time_data, long_name='Profile_Time', standard_name='time', units="seconds since 1993-01-01 00:00:00") time_coord.convert_units(cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") if variable in MIXED_RESOLUTION_VARIABLES: logging.warning( "Using Level 2 resolution profile for mixed resolution variable {}. See CALIPSO " "documentation for more details".format(variable)) data = hdf.read_data(var, self._get_mixed_resolution_calipso_data) else: data = hdf.read_data(var, self._get_calipso_data) cube = Cube(data, long_name=metadata.long_name, units=self.clean_units(metadata.units), dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)], aux_coords_and_dims=[(lat_coord, (0, )), (lon_coord, (0, )), (pres_coord, (0, 1))]) gd = GriddedData.make_from_cube(cube) return gd
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube, CubeList from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit from datetime import datetime from iris.util import new_axis import numpy as np logging.debug("Creating data object for variable " + variable) variables = ["Pressure_Mean"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :] alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :] lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north') lon_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :] lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east') cubes = CubeList() for f in filenames: t = get_data(VDS(f, "Nominal_Year_Month"), True)[0] time_data = cis_standard_time_unit.date2num( datetime(int(t[0:4]), int(t[4:6]), 15)) time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time', units=cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable)) pres_data = self._get_calipso_data( hdf_sd.HDF_SDS(f, 'Pressure_Mean')) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') if data.ndim == 2: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') cubes.append(new_cube) elif data.ndim == 3: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') # Then add the (extended) pressure coord so that it is explicitly a function of time new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3)) cubes.append(new_cube) else: raise ValueError( "Unexpected number of dimensions for CALIOP data: {}". format(data.ndim)) # Concatenate the cubes from each file into a single GriddedData object gd = GriddedData.make_from_cube(cubes.concatenate_cube()) return gd