def _create_coord_list(self): from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import Metadata from cis.time_util import cis_standard_time_unit as cstu # These implement a lot of what is necessary, but aren't in CIS style from acp_utils import rolling_window from orbit import ATSR lat_data = [] lon_data = [] time_data = [] for fname in self.filenames: prod = ATSR(fname) lat_data.append(prod.lat) lon_data.append(prod.lon) time_data.append(prod.get_time()) # TODO: Properly define metadata lat_meta = Metadata(standard_name="latitude", units="degrees") lon_meta = Metadata(standard_name="longitude", units="degrees") time_meta = Metadata(standard_name="time", units=cstu) lat = Coord(concatenate(lat_data), lat_meta, "Y") lat.update_shape() lat.update_range() lon = Coord(concatenate(lon_data), lon_meta, "Y") lon.update_shape() lon.update_range() time = Coord(concatenate(time_data), time_meta, "T") time.update_shape() time.update_range() return CoordList([lat, lon, time])
def _create_bounded_coord_list(self): from acp_utils import rolling_window from orbit import ATSR coords = self._create_coord_list() lat_bounds = [] lon_bounds = [] time_bounds = [] for fname in self.filenames: prod = ATSR(fname) lat_c = rolling_window(prod.lat_corner, (2, 2)) lat_bounds.append(lat_c.reshape(prod.shape + (4, ))) lon_c = rolling_window(prod.lon_corner, (2, 2)) lon_bounds.append(lon_c.reshape(prod.shape + (4, ))) t = prod.get_time() b = np.stack([t, np.roll(t, -1)], axis=2) b[-1, :, 1] = 2 * t[-1, :] - t[-2, :] time_bounds.append(b) coords[0].bounds = concatenate(lat_bounds).reshape( coords[0].data.shape + (4, )) coords[0].bounds[..., 2:4] = coords[0].bounds[..., [3, 2]] coords[1].bounds = concatenate(lon_bounds).reshape( coords[1].data.shape + (4, )) coords[1].bounds[..., 2:4] = coords[1].bounds[..., [3, 2]] coords[2].bounds = concatenate(time_bounds) return coords
def read_data(data_dict, data_type, missing_values=None): if data_type == 'VD': out = utils.concatenate([hdf_vd.get_data(i, missing_values=missing_values) for i in data_dict]) elif data_type == 'SD': out = utils.concatenate([hdf_sd.get_data(i, missing_values=missing_values) for i in data_dict]) else: raise ValueError("Invalid data-type: %s, HDF variables must be VD or SD only" % data_type) return out
def SolZen(self): from orbit import ATSR from cis.data_io.ungridded_data import Metadata tmp = [] for f in self.filenames: orbit = ATSR(f) # Get tie point grid sph = orbit._prod.get_sph() tie_field = sph.get_field("VIEW_ANGLE_TIE_POINTS") tie_pts = tie_field.get_elems() # Get tie point values scan_y = orbit._read_field("NADIR_VIEW_SOLAR_ANGLES_ADS", "img_scan_y") tie_solelv = orbit._read_field("NADIR_VIEW_SOLAR_ANGLES_ADS", "tie_pt_sol_elev") # Swath grid x = np.arange(512) - 255.5 y = orbit._read_field("11500_12500_NM_NADIR_TOA_MDS", "img_scan_y") y[:-1] += 0.5 * (y[1:] - y[:-1]) y[-1] += 0.5 * (y[-1] - y[-2]) solelv = orbit.extrap_atsr_angle(tie_pts, scan_y, x, y, tie_solelv) tmp.append(90. - solelv) return concatenate(tmp), Metadata(standard_name="solar_zenith_angle", units="degrees")
def _create_time_coord(self, timestamp, time_variable_name, data_variables, coord_axis='T', standard_name='time'): """ Create a time coordinate, taking into account the fact that each file may have a different timestamp. :param timestamp: Timestamp or list of timestamps for :param time_variable_name: Name of the time variable :param data_variables: Dictionary containing one or multiple netCDF data variables for each variable name :param coord_axis: Axis, default 'T' :param standard_name: Coord standard name, default 'time' :return: Coordinate """ from iris.coords import AuxCoord from six.moves import zip_longest from cis.time_util import convert_time_using_time_stamp_info_to_std_time as convert, cis_standard_time_unit from cis.utils import concatenate timestamps = listify(timestamp) time_variables = data_variables[time_variable_name] time_data = [] # Create a coordinate for each separate file to account for differing timestamps for file_time_var, timestamp in zip_longest(time_variables, timestamps): metadata = get_metadata(file_time_var) if timestamp is not None: time_d = convert(file_time_var[:], metadata.units, timestamp) else: time_d = metadata.units.convert(file_time_var[:], cis_standard_time_unit) time_data.append(time_d) return AuxCoord(concatenate(time_data), standard_name=standard_name, units=cis_standard_time_unit)
def create_coords(self, filenames, variable=None): """ Reads the coordinates and data if required from the files :param filenames: List of filenames to read coordinates from :param variable: load a variable for the data :return: Coordinates """ from iris.cube import Cube from iris.coords import DimCoord from cis.data_io.netcdf import read from cis.utils import concatenate data_variables, variable_selector = self._load_data(filenames, variable) aux_coords = self._create_coordinates_list(data_variables, variable_selector) dim_coords = [(DimCoord(np.arange(len(aux_coords[0].points)), var_name='obs'), (0,))] if variable is None: raise ValueError("Must specify variable") aux_coord_name = variable_selector.find_auxiliary_coordinate(variable) if aux_coord_name is not None: # We assume that the auxilliary coordinate is the same shape across files v = read(filenames[0], [aux_coord_name])[aux_coord_name] aux_meta = get_metadata(v) # We have to assume the shape here... dim_coords.append((DimCoord(v[:], var_name=aux_coord_name, units=aux_meta.units, long_name=aux_meta.long_name), (1,))) cube_meta = get_metadata(data_variables[variable][0]) return Cube(concatenate([d[:] for d in data_variables[variable]]), units=cube_meta.units, var_name=variable, long_name=cube_meta.long_name, dim_coords_and_dims=dim_coords, aux_coords_and_dims=[(c, (0,)) for c in aux_coords])
def read_data(data_list, read_function): """ Wrapper for calling an HDF reading function for each dataset, and then concatenating the result. :param list data_list: A list of data objects to read :param callable or str read_function: A function for reading the data, or 'SD' or 'VD' for default reading routines. :return: A single numpy array of concatenated data values. """ if callable(read_function): out = utils.concatenate([read_function(i) for i in data_list]) elif read_function == 'VD': out = utils.concatenate([hdf_vd.get_data(i) for i in data_list]) elif read_function == 'SD': out = utils.concatenate([hdf_sd.get_data(i) for i in data_list]) else: raise ValueError("Invalid read-function: {}, please supply a callable read " "function, 'VD' or 'SD' only".format(read_function)) return out
def _create_coord_list(self, filenames): import numpy as np from cis.time_util import calculate_mid_time, cis_standard_time_unit variables = ["XDim", "YDim"] logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) lat = sdata["YDim"] lat_metadata = hdf.read_metadata(lat, "SD") lon = sdata["XDim"] lon_metadata = hdf.read_metadata(lon, "SD") # expand lat and lon data array so that they have the same shape lat_data = utils.expand_1d_to_2d_array( hdf.read_data(lat, "SD"), lon_metadata.shape, axis=1 ) # expand latitude column wise lon_data = utils.expand_1d_to_2d_array( hdf.read_data(lon, "SD"), lat_metadata.shape, axis=0 ) # expand longitude row wise lat_metadata.shape = lat_data.shape lon_metadata.shape = lon_data.shape # to make sure "Latitude" and "Longitude", i.e. the standard_name is displayed instead of "YDim"and "XDim" lat_metadata.standard_name = "latitude" lat_metadata._name = "" lon_metadata.standard_name = "longitude" lon_metadata._name = "" # create arrays for time coordinate using the midpoint of the time delta between the start date and the end date time_data_array = [] for filename in filenames: mid_datetime = calculate_mid_time(self._get_start_date(filename), self._get_end_date(filename)) logging.debug("Using " + str(mid_datetime) + " as datetime for file " + str(filename)) # Only use part of the full lat shape as it has already been concatenated time_data = np.empty((lat_metadata.shape[0] / len(filenames), lat_metadata.shape[1]), dtype="float64") time_data.fill(mid_datetime) time_data_array.append(time_data) time_data = utils.concatenate(time_data_array) time_metadata = Metadata( name="DateTime", standard_name="time", shape=time_data.shape, units=str(cis_standard_time_unit), calendar=cis_standard_time_unit.calendar, ) coords = CoordList() coords.append(Coord(lon_data, lon_metadata, "X")) coords.append(Coord(lat_data, lat_metadata, "Y")) coords.append(Coord(time_data, time_metadata, "T")) return coords
def read_data(data_list, read_function): """ Wrapper for calling an HDF reading function for each dataset, and then concatenating the result. :param list data_list: A list of data objects to read :param callable or str read_function: A function for reading the data, or 'SD' or 'VD' for default reading routines. :return: A single numpy array of concatenated data values. """ if callable(read_function): out = utils.concatenate([read_function(i) for i in data_list]) elif read_function == 'VD': out = utils.concatenate([hdf_vd.get_data(i) for i in data_list]) elif read_function == 'SD': out = utils.concatenate([hdf_sd.get_data(i) for i in data_list]) else: raise ValueError( "Invalid read-function: {}, please supply a callable read " "function, 'VD' or 'SD' only".format(read_function)) return out
def from_many_coordinates(cls, coords): """ Create a single coordinate object from the concatenation of all of the coordinate objects in the input list, updating the shape as appropriate :param coords: A list of coordinate objects to be combined :return: A single :class:`Coord` object """ from cis.utils import concatenate data = concatenate([ug.data for ug in coords]) metadata = coords[0].metadata # Use the first file as a master for the metadata... metadata.shape = data.shape # But update the shape return cls(data, metadata, coords[0].axis)
def hdf_read(filenames, variable, start=None, count=None, stride=None): """Returns variable, concatenated over a sequence of files.""" from cis.data_io.hdf import read from cis.data_io.hdf_sd import get_metadata from cis.utils import concatenate sdata, _ = read(filenames, variable) var = sdata[variable] data = concatenate( [_get_MODIS_SDS_data(i, start, count, stride) for i in var]) metadata = get_metadata(var[0]) return data, metadata
def from_many_coordinates(cls, coords): """ Create a single coordinate object from the concatenation of all of the coordinate objects in the input list, updating the shape as appropriate :param coords: A list of coordinate objects to be combined :return: A single :class:`Coord` object """ from cis.utils import concatenate data = concatenate([ug.data for ug in coords]) metadata = coords[ 0].metadata # Use the first file as a master for the metadata... metadata.shape = data.shape # But update the shape return cls(data, metadata, coords[0].axis)
def ncdf_read(filenames, variable, start=None, count=None, stride=None): """Returns variable, concatenated over a sequence of files.""" from cis.data_io.netcdf import read, get_metadata from cis.utils import concatenate, listify data = [] for f in listify(filenames): sdata = read(f, variable) var = sdata[variable] data.append(_tidy_ncdf_data(var, start, count, stride)) metadata = get_metadata(var) return concatenate(data), metadata
def create_coords(self, filenames, variable=None): """ Override the default read-in to also read in CCN quality flag data and apply the appropriate mask. We have to do this before creating the UngriddedData object so that the missing coords don't get fixed first """ from cis.data_io.netcdf import read_many_files_individually, get_metadata from cis.utils import apply_mask_to_numpy_array, concatenate from cis.data_io.ungridded_data import UngriddedCoordinates, UngriddedData data_variables, variable_selector = self._load_data(filenames, variable) dim_coords = self._create_coordinates_list(data_variables, variable_selector) if variable is None: return UngriddedCoordinates(dim_coords) else: aux_coord_name = variable_selector.find_auxiliary_coordinate(variable) if aux_coord_name is not None: all_coords = self._add_aux_coordinate(dim_coords, filenames[0], aux_coord_name, dim_coords.get_coord(standard_name='time').data.size) else: all_coords = dim_coords var_data = data_variables[variable] if variable and variable.startswith('CCN_COL'): # Work out the associated variable name for this column ccn_flag_var = "COL{}_FLAG".format(variable[-1]) # Read in the flags flags = concatenate([get_data(v) for v in read_many_files_individually(filenames, ccn_flag_var)[ ccn_flag_var]]) # 0 and 1 are both OK mask = flags > 1 # If a variable was supplied then coords must be an ungridded data object, apply the mask to it var_data = apply_mask_to_numpy_array(concatenate([get_data(v) for v in var_data]), mask) return UngriddedData(var_data, get_metadata(data_variables[variable][0]), all_coords)
def _generate_time_array(self, vdata): import cis.data_io.hdf_vd as hdf_vd import datetime as dt from cis.time_util import convert_sec_since_to_std_time Cloudsat_start_time = dt.datetime(1993, 1, 1, 0, 0, 0) arrays = [] for i, j in zip(vdata['Profile_time'], vdata['TAI_start']): time = hdf_vd.get_data(i) start = hdf_vd.get_data(j) time += start # Do the conversion to standard time here before we expand the time array... time = convert_sec_since_to_std_time(time, Cloudsat_start_time) arrays.append(time) return utils.concatenate(arrays)
def _create_coord(self, coord_axis, data_variable_name, data_variables, standard_name): """ Create a coordinate for the co-ordinate list :param coord_axis: axis of the coordinate in the coords :param data_variable_name: the name of the variable in the data :param data_variables: the data variables :param standard_name: the standard name it should have :return: a coords object """ from iris.coords import AuxCoord from cis.utils import concatenate data = concatenate([d[:] for d in data_variables[data_variable_name]]) m = get_metadata(data_variables[data_variable_name][0]) return AuxCoord(data, units=m.units, standard_name=standard_name)
def _create_coord_list(self, filenames, variable=None): import datetime as dt from cis.time_util import convert_time_since_to_std_time, cis_standard_time_unit from cis.utils import concatenate from cf_units import Unit from geotiepoints import modis5kmto1km variables = ['Latitude', 'Longitude', 'View_time'] logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) apply_interpolation = False if variable is not None: scale = self.__get_data_scale(filenames[0], variable) apply_interpolation = True if scale is "1km" else False lat_data = hdf.read_data(sdata['Latitude'], _get_MODIS_SDS_data) lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lon_data = hdf.read_data(sdata['Longitude'], _get_MODIS_SDS_data) lon_metadata = hdf.read_metadata(sdata['Longitude'], "SD") if apply_interpolation: lon_data, lat_data = modis5kmto1km(lon_data, lat_data) lat_coord = Coord(lat_data, lat_metadata, 'Y') lon_coord = Coord(lon_data, lon_metadata, 'X') time = sdata['View_time'] time_metadata = hdf.read_metadata(time, "SD") # Ensure the standard name is set time_metadata.standard_name = 'time' time_metadata.units = cis_standard_time_unit t_arrays = [] for f, d in zip(filenames, time): time_start = self._get_start_date(f) t_data = _get_MODIS_SDS_data( d) / 24.0 # Convert hours since to days since t_offset = time_start - dt.datetime(1600, 1, 1) # Convert to CIS time t_arrays.append(t_data + t_offset.days) time_coord = Coord(concatenate(t_arrays), time_metadata, "T") return CoordList([lat_coord, lon_coord, time_coord])
def _create_coord_list(self, filenames, variable=None): import datetime as dt from cis.time_util import convert_time_since_to_std_time, cis_standard_time_unit from cis.utils import concatenate from cf_units import Unit from geotiepoints import modis5kmto1km variables = ['Latitude', 'Longitude', 'View_time'] logging.info("Listing coordinates: " + str(variables)) sdata, vdata = hdf.read(filenames, variables) apply_interpolation = False if variable is not None: scale = self.__get_data_scale(filenames[0], variable) apply_interpolation = True if scale is "1km" else False lat_data = hdf.read_data(sdata['Latitude'], _get_MODIS_SDS_data) lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lon_data = hdf.read_data(sdata['Longitude'], _get_MODIS_SDS_data) lon_metadata = hdf.read_metadata(sdata['Longitude'], "SD") if apply_interpolation: lon_data, lat_data = modis5kmto1km(lon_data, lat_data) lat_coord = Coord(lat_data, lat_metadata, 'Y') lon_coord = Coord(lon_data, lon_metadata, 'X') time = sdata['View_time'] time_metadata = hdf.read_metadata(time, "SD") # Ensure the standard name is set time_metadata.standard_name = 'time' time_metadata.units = cis_standard_time_unit t_arrays = [] for f, d in zip(filenames, time): time_start = self._get_start_date(f) t_data = _get_MODIS_SDS_data(d) / 24.0 # Convert hours since to days since t_offset = time_start - dt.datetime(1600, 1, 1) # Convert to CIS time t_arrays.append(t_data + t_offset.days) time_coord = Coord(concatenate(t_arrays), time_metadata, "T") return CoordList([lat_coord, lon_coord, time_coord])
def load_multiple_aeronet(filenames, variables=None): from cis.utils import add_element_to_list_in_dict, concatenate adata = {} for filename in filenames: logging.debug("reading file: " + filename) # reading in all variables into a dictionary: # a_dict, key: variable name, value: list of masked arrays a_dict = load_aeronet(filename, variables) for var in list(a_dict.keys()): add_element_to_list_in_dict(adata, var, a_dict[var]) for var in list(adata.keys()): adata[var] = concatenate(adata[var]) return adata
def _create_bounded_coord_list(self): from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import Metadata from cis.time_util import cis_standard_time_unit as cstu # These implement a lot of what is necessary, but aren't in CIS style from acp_utils import rolling_window from orbit import MODIS lat_data = [] lat_bounds = [] lon_data = [] lon_bounds = [] time_data = [] time_bounds = [] for fname in self.filenames: prod = MODIS(fname) lat_data.append(prod.lat) lon_data.append(prod.lon) lat_c = rolling_window(prod.lat_corner, (2, 2)) lat_bounds.append(lat_c.reshape(prod.shape + (4, ))) lon_c = rolling_window(prod.lon_corner, (2, 2)) lon_bounds.append(lon_c.reshape(prod.shape + (4, ))) t = prod.get_time() time_data.append(t) b = np.stack([t, np.roll(t, -1)], axis=2) b[-1, :, 1] = 2 * t[-1, :] - t[-2, :] time_bounds.append(b) # TODO: Properly define metadata lat_meta = Metadata(standard_name="latitude", units="degrees") lon_meta = Metadata(standard_name="longitude", units="degrees") time_meta = Metadata(standard_name="time", units=cstu) lat = Coord(concatenate(lat_data), lat_meta, "Y") lat.update_shape() lat.update_range() lat.bounds = concatenate(lat_bounds).reshape(lat.shape + (4, )) lat.bounds[..., 2:4] = lat.bounds[..., [3, 2]] lon = Coord(concatenate(lon_data), lon_meta, "Y") lon.update_shape() lon.update_range() lon.bounds = concatenate(lon_bounds).reshape(lon.shape + (4, )) lon.bounds[..., 2:4] = lon.bounds[..., [3, 2]] time = Coord(concatenate(time_data), time_meta, "T") time.update_shape() time.update_range() time.bounds = concatenate(time_bounds) return CoordList([lat, lon, time])
def create_coords(self, filenames, variable=None): from cis.data_io.ungridded_data import Metadata from numpy import genfromtxt, NaN from cis.exceptions import InvalidVariableError from cis.time_util import convert_datetime_to_std_time import dateutil.parser as du array_list = [] for filename in filenames: try: array_list.append(genfromtxt(filename, dtype="f8,f8,f8,O,f8", names=['latitude', 'longitude', 'altitude', 'time', 'value'], delimiter=',', missing_values='', usemask=True, invalid_raise=True, converters={"time": du.parse})) except: raise IOError('Unable to read file ' + filename) data_array = utils.concatenate(array_list) n_elements = len(data_array['latitude']) coords = CoordList() coords.append(Coord(data_array["latitude"], Metadata(standard_name="latitude", shape=(n_elements,), units="degrees_north"))) coords.append(Coord(data_array["longitude"], Metadata(standard_name="longitude", shape=(n_elements,), units="degrees_east"))) coords.append( Coord(data_array["altitude"], Metadata(standard_name="altitude", shape=(n_elements,), units="meters"))) time_arr = convert_datetime_to_std_time(data_array["time"]) time = Coord(time_arr, Metadata(standard_name="time", shape=(n_elements,), units="days since 1600-01-01 00:00:00")) coords.append(time) if variable: try: data = UngriddedData(data_array['value'], Metadata(name="value", shape=(n_elements,), units="unknown", missing_value=NaN), coords) except: InvalidVariableError("Value column does not exist in file " + filenames) return data else: return UngriddedCoordinates(coords)
def _create_coord(self, coord_axis, data_variable_name, data_variables, standard_name): """ Create a coordinate for the co-ordinate list :param coord_axis: axis of the coordinate in the coords :param data_variable_name: the name of the variable in the data :param data_variables: the data variables :param standard_name: the standard name it should have :return: a coords object """ from cis.data_io.netcdf import get_metadata from iris.coords import AuxCoord from cis.utils import concatenate from cf_units import Unit import logging data = concatenate( [get_data(d) for d in data_variables[data_variable_name]]) m = get_metadata(data_variables[data_variable_name][0]) m._name = m._name.lower() m.standard_name = standard_name if standard_name == 'air_pressure': if not isinstance(m.units, Unit): if ',' in m.units: # Try splitting any commas out m.units = m.units.split(',')[0] if ' ' in m.units: # Try splitting any spaces out m.units = m.units.split()[0] if str(m.units) == 'mb' or str(m.units) == 'Mb': # Try converting to standard nomencleture m.units = 'mbar' if str(m.units) == 'hpa': m.units = 'hPa' logging.info("Parsed air pressure units {old}".format(old=m.units)) logging.info('Converting to hPa') if not isinstance(m.units, str): data = m.units.convert(data, 'hPa') m.units = 'hPa' return AuxCoord(data, units=m.units, standard_name=standard_name)
def create_coords(self, filenames, variable=None): """ Reads the coordinates and data if required from the files :param filenames: List of filenames to read coordinates from :param variable: load a variable for the data :return: Coordinates """ from iris.cube import Cube from iris.coords import DimCoord from cis.data_io.netcdf import read from cis.utils import concatenate data_variables, variable_selector = self._load_data( filenames, variable) aux_coords = self._create_coordinates_list(data_variables, variable_selector) dim_coords = [(DimCoord(np.arange(len(aux_coords[0].points)), var_name='obs'), (0, ))] if variable is None: raise ValueError("Must specify variable") aux_coord_name = variable_selector.find_auxiliary_coordinate(variable) if aux_coord_name is not None: # We assume that the auxilliary coordinate is the same shape across files v = read(filenames[0], [aux_coord_name])[aux_coord_name] aux_meta = get_metadata(v) # We have to assume the shape here... dim_coords.append((DimCoord(v[:], var_name=aux_coord_name, units=aux_meta.units, long_name=aux_meta.long_name), (1, ))) cube_meta = get_metadata(data_variables[variable][0]) return Cube(concatenate([d[:] for d in data_variables[variable]]), units=cube_meta.units, var_name=variable, long_name=cube_meta.long_name, dim_coords_and_dims=dim_coords, aux_coords_and_dims=[(c, (0, )) for c in aux_coords])
def load_multiple_hysplit(fnames, variables=None): from cis.utils import add_element_to_list_in_dict, concatenate hdata = {} for filename in fnames: logging.debug("reading file: " + filename) # read in all trajectories # h_dict, key: trajectory starting lat/lon/altm value: dict containing trajectory data h_dict = load_hysplit(filename, variables) for traj in list(h_dict.keys()): if (traj in hdata): for var in list(h_dict[traj].keys()): # TODO error appending masked array! add these manually add_element_to_list_in_dict(hdata[traj], var, h_dict[traj][var]) for var in list(hdata[traj].keys()): hdata[traj][var] = concatenate(hdata[traj][var]) else: hdata[traj] = h_dict[traj] return hdata
def create_data_object(self, filenames, variable): logging.debug("Creating data object for variable " + variable) variables = [("ER2_IMU/Longitude", "x"), ("ER2_IMU/Latitude", "y"), ("ER2_IMU/gps_time", "t"), ("State/Pressure", "p"), ("DataProducts/Altitude", "z"), ("header/date", ""), (variable, '')] logging.info("Listing coordinates: " + str(variables)) var_data = read_many_files_individually(filenames, [v[0] for v in variables]) date_times = [] for times, date in zip(var_data['ER2_IMU/gps_time'], var_data['header/date']): # Date is stored as an array (of length 92??) of floats with format: yyyymmdd date_str = str(int(date[0])) t_unit = Unit('hours since {}-{}-{} 00:00:00'.format( date_str[0:4], date_str[4:6], date_str[6:8])) date_times.append( t_unit.convert(get_data(times), cis_standard_time_unit)) # time_data = utils.concatenate([get_data(i) for i in var_data['ER2_IMU/gps_time']]) # date_str = str(int(var_data['header/date'][0][0])) # Flatten the data by taking the 0th column of the transpose time_coord = DimCoord(utils.concatenate(date_times).T[0], standard_name='time', units=cis_standard_time_unit) # TODO This won't work for multiple files since the altitude bins are different for each flight... alt_data = utils.concatenate( [get_data(i) for i in var_data["DataProducts/Altitude"]]) alt_coord = DimCoord(alt_data[0], standard_name='altitude', units='m') pres_data = utils.concatenate( [get_data(i) for i in var_data["State/Pressure"]]) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='atm') # Fix the air-pressure units pres_coord.convert_units('hPa') lat_data = utils.concatenate( [get_data(i) for i in var_data['ER2_IMU/Latitude']]) lat_coord = AuxCoord(lat_data.T[0], standard_name='latitude') lon_data = utils.concatenate( [get_data(i) for i in var_data['ER2_IMU/Longitude']]) lon_coord = AuxCoord(lon_data.T[0], standard_name='longitude') data = utils.concatenate([get_data(i) for i in var_data[variable]]) metadata = get_metadata(var_data[variable][0]) cube = Cube(np.ma.masked_invalid(data), long_name=metadata.misc['Description'], units=self.clean_units(metadata.units), dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)], aux_coords_and_dims=[(lat_coord, (0, )), (lon_coord, (0, )), (pres_coord, (0, 1))]) gd = GriddedData.make_from_cube(cube) return gd
def load_hysplit(fname, variables=None): import numpy as np from numpy import ma from datetime import datetime, timedelta from cis.time_util import cis_standard_time_unit from cis.utils import add_element_to_list_in_dict, concatenate std_day = cis_standard_time_unit.num2date(0) fmetadata = get_file_metadata(fname) try: rawd = np.genfromtxt(fname, skip_header=fmetadata['data_start'], dtype=np.float64, usemask=True) except (StopIteration, IndexError) as e: raise IOError(e) data_dict = {} # Get data for one trajectory at a time for t in range(1, fmetadata['n_trajectories']+1): tdata_dict = {} trajectory_data = rawd[rawd[:,hysplit_default_var.index('TRAJECTORY_NO')] == t] # Convert time from each row to standard time for trajectory in trajectory_data: day = datetime((int(trajectory[2]) + 2000), # TODO Dan: is it okay to assume this? int(trajectory[3]), int(trajectory[4])) sday = float((day - std_day).days) td = timedelta(hours=int(trajectory[5]), minutes=int(trajectory[6])) fractional_day = td.total_seconds()/(24.0*60.0*60.0) dt = sday + fractional_day add_element_to_list_in_dict(tdata_dict, 'DATETIMES', [dt]) # Clean up data tdata_dict['DATETIMES'] = ma.array(concatenate(tdata_dict['DATETIMES'])) # TODO mask is only one value # Add other default data tdata_dict['LAT'] = trajectory_data[:,hysplit_default_var.index('LAT')] tdata_dict['LON'] = trajectory_data[:,hysplit_default_var.index('LON')] tdata_dict['ALT'] = trajectory_data[:,hysplit_default_var.index('ALT')] tdata_dict['PRESSURE'] = trajectory_data[:, hysplit_default_var.index('PRESSURE')] # TODO any other default variables to add? # If variables set, fetch only set variables if variables is not None: for key in variables: try: tdata_dict[key] = trajectory_data[:,fmetadata['labels'].index(key)] except ValueError: raise InvalidVariableError(key + "does not exist in " + fname) # Else, return all variables in file else: for label in fmetadata['custom_labels']: try: tdata_dict[label] = trajectory_data[:,fmetadata['labels'].index(label)] except ValueError: raise InvalidVariableError(key + " does not exist in " + fname) # TODO trajectory keys are tuples of lat/long/alt tkey = fmetadata['trajectories'][t] data_dict[tkey] = tdata_dict return data_dict
def _create_bounded_coord_list(self): """Adaptation of the CIS MODIS_L2 class version that isn't lazy.""" from cis.time_util import convert_sec_since_to_std_time from pyhdf.error import HDF4Error from pyhdf.SD import SD def calc_latlon_bounds(base_data, nrows=10): """Interpolate 10-line MODIS scans to return pixel edges.""" from acp_utils import rolling_window from itertools import product from scipy.interpolate import RegularGridInterpolator # Coordinates in file give cell centres nx, ny = base_data.shape assert nx % nrows == 0 x0 = np.arange(0.5, nrows, 1) y0 = np.arange(0.5, ny, 1) # Aerosol pixels skip the outermost columns ystart = (ny % nrows) // 2 x1 = np.array([0, nrows]) y1 = np.arange(ystart, ny + 1, nrows) # Iterate over 10-line chunks bounds = [] for chunk in np.split(base_data, nx // nrows, 0): if (chunk.max() - chunk.min()) > 180.: # Sodding dateline chunk[chunk < 0.] += 360. interp = RegularGridInterpolator((x0, y0), chunk, "linear", False, None) tmp = interp(list(product(x1, y1))).reshape(2, len(y1)) corners = rolling_window(tmp, (2, 2)) bounds.append(corners.reshape(ny // nrows, 4)) # Ensure corners are given in sequential order bounds = np.ma.masked_invalid(bounds) bounds[..., 2:4] = bounds[..., [3, 2]] return bounds lon_bounds = [] lat_bounds = [] for f in self._mod03_filenames: try: file_object = SD(f) lon_1kmdata = _get_hdf_data(file_object, "Longitude") lat_1kmdata = _get_hdf_data(file_object, "Latitude") file_object.end() except HDF4Error: raise IOError("Corrupted file " + f) tmp_bounds = calc_latlon_bounds(lon_1kmdata) tmp_bounds[tmp_bounds > 180.] -= 360. tmp_bounds[tmp_bounds <= -180.] += 360. lon_bounds.append(tmp_bounds) tmp_bounds = calc_latlon_bounds(lat_1kmdata) tmp_bounds[tmp_bounds >= 90.] = np.ma.masked tmp_bounds[tmp_bounds <= -90.] = np.ma.masked lat_bounds.append(tmp_bounds) coords = self._create_coord_list() coords[0].bounds = concatenate(lat_bounds) coords[1].bounds = concatenate(lon_bounds) unique_times = np.unique(coords[2].data.compressed()) try: deltas = unique_times[1:] - unique_times[:-1] delta_map = {t: d / 2 for t, d in zip(unique_times, deltas)} delta_map[unique_times[-1]] = deltas[-1] / 2 time_bounds = np.ma.array([ [t - delta_map[t], t + delta_map[t]] if t is not np.ma.masked else [np.ma.masked, np.ma.masked] for t in coords[2].data.ravel() ]).reshape(coords[2].data.shape + (2, )) except IndexError: # File too small to have multiple time stamps; guess +-2.5min time_bounds = np.stack( [coords[2].data - 0.00174, coords[2].data + 0.00174], axis=2) coords[2].bounds = convert_sec_since_to_std_time( time_bounds, MODIS_REFERENCE_TIME) return coords
def _calculate_grid_time(self, var_name, lat_data, lon_data): """Approximate time from a pair of corresponding MOD03 files""" from osgeo.gdal import Open from scipy.interpolate import griddata def fetch_MOD03_coordinates(start_time, aqua=False): import os.path from glob import glob from pyhdf.SD import SD from pyhdf.error import HDF4Error # Locate MOD03 file search_path = start_time.strftime( os.path.join(self.mod03_path, "MOD03.A%Y%j.%H%M.061*hdf")) if aqua: # NOTE: System dependent approximation search_path = search_path.replace("MOD", "MYD") try: mod03_file = glob(search_path)[0] except IndexError: raise FileNotFoundError("MOD03: " + search_path) # Read space-time grid from that file try: file_object = SD(mod03_file) dims = file_object.datasets()["Longitude"][1] count = dims[0] // 10, dims[1] // 10 mod_lon = _get_hdf_data(file_object, "Longitude", start=(0, 2), count=count, stride=(10, 10)) mod_lat = _get_hdf_data(file_object, "Latitude", start=(0, 2), count=count, stride=(10, 10)) mod_time = _get_hdf_data(file_object, "EV start time", count=count[:1]) file_object.end() except HDF4Error: raise IOError("Corrupted file: " + mod03_file) return mod_lon, mod_lat, mod_time time_data = [] variable = Open(var_name) meta = variable.GetMetadata_Dict() for timestamp in meta["Orbit_time_stamp"].split(): # Parse time stamp start_time = dt.datetime.strptime(timestamp[:-1], "%Y%j%H%M") try: # Interpolate time from MOD03 files mod_lon0, mod_lat0, mod_time0 = fetch_MOD03_coordinates( start_time - dt.timedelta(seconds=300), timestamp[-1] == "A") mod_lon1, mod_lat1, mod_time1 = fetch_MOD03_coordinates( start_time, timestamp[-1] == "A") mod_lon = concatenate([mod_lon0, mod_lon1]) mod_lat = concatenate([mod_lat0, mod_lat1]) mod_time = concatenate([mod_time0, mod_time1]) if (mod_lon.max() - mod_lon.min()) > 180.: # Sodding dateline mod_lon[mod_lon < 0.] += 360. # Interpolate that grid onto the sinusoidal projection time = griddata((mod_lon.ravel(), mod_lat.ravel()), np.tile(mod_time, mod_lon.shape[1]), (lon_data, lat_data), method="nearest") except (FileNotFoundError, TypeError): # Just use the orbit start time seconds = start_time - MODIS_REFERENCE_TIME time = np.full(lat_data.shape, seconds.total_seconds()) time_data.append(time) return concatenate(time_data)