def _create_bounded_coord_list(self): from cis.time_util import convert_sec_since_to_std_time from os.path import basename coords = self._create_coord_list() lat_bounds_all = [] lon_bounds_all = [] for fname in self.filenames: if self.grid_path: granule = basename(fname).split(".")[2] lat_bounds, lon_bounds = self._read_grid_edges(granule) else: var_name = self.gdal_variable_name(fname, "Optical_Depth_055") lat_bounds, lon_bounds = self._calculate_grid_edges(var_name) # Workaround files containing only one day sh = (-1, ) + lat_bounds.shape[:-1] keep = np.logical_not(self._read_qcmask(fname)).reshape(sh) for keep_slice in keep: lat_bounds_all.extend(lat_bounds[keep_slice]) lon_bounds_all.extend(lon_bounds[keep_slice]) coords[0].bounds = np.ma.array(lat_bounds_all) coords[1].bounds = np.ma.array(lon_bounds_all) # As the time stamp is approximate (multiple scans can fall in a single # sinusoidal cell), guess the bounds are +/- 2 scans (each being 5s). coords[2].bounds = convert_sec_since_to_std_time( np.stack([coords[2].data - 10, coords[2].data + 10], axis=-1), MODIS_REFERENCE_TIME) return coords
def _create_one_dimensional_coord_list(self, filenames, index_offset=1): """ Create a set of coordinates appropriate for a ond-dimensional (column integrated) variable :param filenames: :param int index_offset: For 5km products this will choose the coordinates which represent the start (0), middle (1) and end (2) of the 15 shots making up each column retrieval. :return: """ from pyhdf.error import HDF4Error from cis.data_io import hdf_sd import datetime as dt from cis.time_util import convert_sec_since_to_std_time, cis_standard_time_unit variables = ['Latitude', 'Longitude', "Profile_Time"] logging.info("Listing coordinates: " + str(variables)) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # latitude lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset] lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lat_coord = Coord(lat_data, lat_metadata, 'Y') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset] lon_metadata = hdf.read_metadata(lon, "SD") lon_coord = Coord(lon_data, lon_metadata, 'X') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset] time_data = convert_sec_since_to_std_time(time_data, dt.datetime(1993, 1, 1, 0, 0, 0)) time_coord = Coord(time_data, Metadata(name='Profile_Time', standard_name='time', shape=time_data.shape, units=cis_standard_time_unit), "T") # create the object containing all coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) coords.append(time_coord) return coords
def test_that_can_convert_julian_tai_to_datetime_obj(self): import numpy as np sec = 1.0/(24.0*60.0*60.0) days_since_standard_epoch = 143541.0 # Almost, but not quite 365.2425*393.0, not sure why... a = np.arange(6).reshape(2, 3) b = convert_sec_since_to_std_time(a, dt.datetime(1993, 1, 1)) eq_(a.shape, b.shape) assert_almost_equal(b[0][0], days_since_standard_epoch) assert_almost_equal(b[0][1], days_since_standard_epoch+1*sec) assert_almost_equal(b[0][2], days_since_standard_epoch+2*sec) assert_almost_equal(b[1][0], days_since_standard_epoch+3*sec) assert_almost_equal(b[1][1], days_since_standard_epoch+4*sec) assert_almost_equal(b[1][2], days_since_standard_epoch+5*sec)
def test_that_can_convert_masked_tai_to_datetime_obj(self): import numpy.ma as ma sec = 1.0/(24.0*60.0*60.0) days_since_standard_epoch = 143541.0 # Almost, but not quite 365.2425*393.0, not sure why... a = ma.array([0, 1, 2, 3, 4, 5], mask=[False, False, True, False, False, False]).reshape(2, 3) b = convert_sec_since_to_std_time(a, dt.datetime(1993, 1, 1)) eq_(a.shape, b.shape) assert_almost_equal(b[0][0], days_since_standard_epoch) assert_almost_equal(b[0][1], days_since_standard_epoch+1*sec) assert_almost_equal(b.filled()[0][2], b.fill_value) assert_almost_equal(b[1][0], days_since_standard_epoch+3*sec) assert_almost_equal(b[1][1], days_since_standard_epoch+4*sec) assert_almost_equal(b[1][2], days_since_standard_epoch+5*sec)
def test_that_can_convert_julian_tai_to_datetime_obj(self): import numpy as np sec = 1.0 / (24.0 * 60.0 * 60.0) days_since_standard_epoch = 143541.0 # Almost, but not quite 365.2425*393.0, not sure why... a = np.arange(6).reshape(2, 3) b = convert_sec_since_to_std_time(a, dt.datetime(1993, 1, 1)) eq_(a.shape, b.shape) assert_almost_equal(b[0][0], days_since_standard_epoch) assert_almost_equal(b[0][1], days_since_standard_epoch + 1 * sec) assert_almost_equal(b[0][2], days_since_standard_epoch + 2 * sec) assert_almost_equal(b[1][0], days_since_standard_epoch + 3 * sec) assert_almost_equal(b[1][1], days_since_standard_epoch + 4 * sec) assert_almost_equal(b[1][2], days_since_standard_epoch + 5 * sec)
def _generate_time_array(self, vdata): import cis.data_io.hdf_vd as hdf_vd import datetime as dt from cis.time_util import convert_sec_since_to_std_time Cloudsat_start_time = dt.datetime(1993, 1, 1, 0, 0, 0) arrays = [] for i, j in zip(vdata['Profile_time'], vdata['TAI_start']): time = hdf_vd.get_data(i) start = hdf_vd.get_data(j) time += start # Do the conversion to standard time here before we expand the time array... time = convert_sec_since_to_std_time(time, Cloudsat_start_time) arrays.append(time) return utils.concatenate(arrays)
def test_that_can_convert_masked_tai_to_datetime_obj(self): import numpy.ma as ma sec = 1.0 / (24.0 * 60.0 * 60.0) days_since_standard_epoch = 143541.0 # Almost, but not quite 365.2425*393.0, not sure why... a = ma.array([0, 1, 2, 3, 4, 5], mask=[False, False, True, False, False, False]).reshape(2, 3) b = convert_sec_since_to_std_time(a, dt.datetime(1993, 1, 1)) eq_(a.shape, b.shape) assert_almost_equal(b[0][0], days_since_standard_epoch) assert_almost_equal(b[0][1], days_since_standard_epoch + 1 * sec) assert_almost_equal(b.filled()[0][2], b.fill_value) assert_almost_equal(b[1][0], days_since_standard_epoch + 3 * sec) assert_almost_equal(b[1][1], days_since_standard_epoch + 4 * sec) assert_almost_equal(b[1][2], days_since_standard_epoch + 5 * sec)
def _create_coord_list(self, filenames, index_offset=0): import logging from cis.data_io import hdf as hdf from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import Metadata import cis.utils as utils from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd import datetime as dt from cis.time_util import convert_sec_since_to_std_time, cis_standard_time_unit variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"] logging.info("Listing coordinates: " + str(variables)) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) alt_name = "altitude" logging.info("Additional coordinates: '" + alt_name + "'") # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True) alt_data *= 1000.0 # Convert to m len_x = alt_data.shape[0] lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data) len_y = lat_data.shape[0] new_shape = (len_x, len_y) # altitude alt_data = utils.expand_1d_to_2d_array(alt_data, len_y, axis=0) alt_metadata = Metadata(name=alt_name, standard_name=alt_name, shape=new_shape) alt_coord = Coord(alt_data, alt_metadata) # pressure if self.include_pressure: pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data) pres_metadata = hdf.read_metadata(sdata['Pressure'], "SD") # Fix badly formatted units which aren't CF compliant and will break if they are aggregated if str(pres_metadata.units) == "hPA": pres_metadata.units = "hPa" pres_metadata.shape = new_shape pres_coord = Coord(pres_data, pres_metadata, 'P') # latitude lat_data = utils.expand_1d_to_2d_array(lat_data[:, index_offset], len_x, axis=1) lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD") lat_metadata.shape = new_shape lat_coord = Coord(lat_data, lat_metadata, 'Y') # longitude lon = sdata['Longitude'] lon_data = hdf.read_data(lon, self._get_calipso_data) lon_data = utils.expand_1d_to_2d_array(lon_data[:, index_offset], len_x, axis=1) lon_metadata = hdf.read_metadata(lon, "SD") lon_metadata.shape = new_shape lon_coord = Coord(lon_data, lon_metadata, 'X') # profile time, x time = sdata['Profile_Time'] time_data = hdf.read_data(time, self._get_calipso_data) time_data = convert_sec_since_to_std_time(time_data, dt.datetime(1993, 1, 1, 0, 0, 0)) time_data = utils.expand_1d_to_2d_array(time_data[:, index_offset], len_x, axis=1) time_coord = Coord(time_data, Metadata(name='Profile_Time', standard_name='time', shape=time_data.shape, units=cis_standard_time_unit), "T") # create the object containing all coordinates coords = CoordList() coords.append(lat_coord) coords.append(lon_coord) coords.append(time_coord) coords.append(alt_coord) if self.include_pressure and (pres_data.shape == alt_data.shape): # For MODIS L1 this may is not be true, so skips the air pressure reading. If required for MODIS L1 then # some kind of interpolation of the air pressure would be required, as it is on a different (smaller) grid # than for the Lidar_Data_Altitudes. coords.append(pres_coord) return coords
def convert_TAI_time_to_std_time(self, ref): from cis.time_util import convert_sec_since_to_std_time, cis_standard_time_unit self._data = convert_sec_since_to_std_time(self.data, ref) self.units = cis_standard_time_unit
def _create_bounded_coord_list(self): """Adaptation of the CIS MODIS_L2 class version that isn't lazy.""" from cis.time_util import convert_sec_since_to_std_time from pyhdf.error import HDF4Error from pyhdf.SD import SD def calc_latlon_bounds(base_data, nrows=10): """Interpolate 10-line MODIS scans to return pixel edges.""" from acp_utils import rolling_window from itertools import product from scipy.interpolate import RegularGridInterpolator # Coordinates in file give cell centres nx, ny = base_data.shape assert nx % nrows == 0 x0 = np.arange(0.5, nrows, 1) y0 = np.arange(0.5, ny, 1) # Aerosol pixels skip the outermost columns ystart = (ny % nrows) // 2 x1 = np.array([0, nrows]) y1 = np.arange(ystart, ny + 1, nrows) # Iterate over 10-line chunks bounds = [] for chunk in np.split(base_data, nx // nrows, 0): if (chunk.max() - chunk.min()) > 180.: # Sodding dateline chunk[chunk < 0.] += 360. interp = RegularGridInterpolator((x0, y0), chunk, "linear", False, None) tmp = interp(list(product(x1, y1))).reshape(2, len(y1)) corners = rolling_window(tmp, (2, 2)) bounds.append(corners.reshape(ny // nrows, 4)) # Ensure corners are given in sequential order bounds = np.ma.masked_invalid(bounds) bounds[..., 2:4] = bounds[..., [3, 2]] return bounds lon_bounds = [] lat_bounds = [] for f in self._mod03_filenames: try: file_object = SD(f) lon_1kmdata = _get_hdf_data(file_object, "Longitude") lat_1kmdata = _get_hdf_data(file_object, "Latitude") file_object.end() except HDF4Error: raise IOError("Corrupted file " + f) tmp_bounds = calc_latlon_bounds(lon_1kmdata) tmp_bounds[tmp_bounds > 180.] -= 360. tmp_bounds[tmp_bounds <= -180.] += 360. lon_bounds.append(tmp_bounds) tmp_bounds = calc_latlon_bounds(lat_1kmdata) tmp_bounds[tmp_bounds >= 90.] = np.ma.masked tmp_bounds[tmp_bounds <= -90.] = np.ma.masked lat_bounds.append(tmp_bounds) coords = self._create_coord_list() coords[0].bounds = concatenate(lat_bounds) coords[1].bounds = concatenate(lon_bounds) unique_times = np.unique(coords[2].data.compressed()) try: deltas = unique_times[1:] - unique_times[:-1] delta_map = {t: d / 2 for t, d in zip(unique_times, deltas)} delta_map[unique_times[-1]] = deltas[-1] / 2 time_bounds = np.ma.array([ [t - delta_map[t], t + delta_map[t]] if t is not np.ma.masked else [np.ma.masked, np.ma.masked] for t in coords[2].data.ravel() ]).reshape(coords[2].data.shape + (2, )) except IndexError: # File too small to have multiple time stamps; guess +-2.5min time_bounds = np.stack( [coords[2].data - 0.00174, coords[2].data + 0.00174], axis=2) coords[2].bounds = convert_sec_since_to_std_time( time_bounds, MODIS_REFERENCE_TIME) return coords