def get_surface_grouping(self, sounding_id): surface_type = None try: # GOSAT and OCO L1B files have differing ways of representing the # land type if self.instrument_name == GOSAT_INST_NAME: land_value = self.get_sounding_info('land_fraction', sounding_id) land_values_check_dict = LAND_FRACTION_PERCENTAGE elif self.instrument_name == OCO_INST_NAME: land_value = self.get_sounding_info('land_water_indicator', sounding_id) land_values_check_dict = LAND_WATER_INDICATOR else: raise MissingDataset("Unknown instrument.") for type_name, land_check in land_values_check_dict.items(): if land_check(land_value): surface_type = type_name break except MissingDataset: # Fall back to trying to read out of a surface database file # Initalize as needed if self.surf_type_obj == None: self.surf_type_obj = ModisEcoMap() # Just use first lat/lon if sounding matches multiple polarizations, etc latitude = self.get_sounding_info('latitude', sounding_id, flatten=True)[0] longitude = self.get_sounding_info('longitude', sounding_id, flatten=True)[0] surface_type = self.surf_type_obj.get_surface_grouping(latitude, longitude) return surface_type
class L1B(SoundingDataFile): def __init__(self, filename, mode=None, **kwargs): # Init super-class SoundingDataFile.__init__(self, filename, mode, **kwargs) # Initialize as needed self.surf_type_obj = None # Needed internally for selecting correct dataset self.instrument_name = self.determine_instrument_name() # See if the file type is one we recognize if self.instrument_name not in (GOSAT_INST_NAME, OCO_INST_NAME): raise LookupError('Unrecognized instrument name detected: %s' % self.instrument_name) self._sounding_id_dataset = SOUNDING_ID_DATASET[self.instrument_name] self._data_shape_name_dict = SPECIFIC_L1B_INFO_SHAPE_NAMES self._default_shape_names = DEFAULT_L1B_INFO_SHAPE_NAMES[self.instrument_name] def get_sounding_ids(self, add_polarization=False): file_sounding_ids = SoundingDataFile.get_sounding_ids(self) if add_polarization and self.instrument_name == GOSAT_INST_NAME: sounding_ids = [] for curr_id in file_sounding_ids: for pol_name in GOSAT_POL_ORDER: sounding_ids.append( '%d%s' % (curr_id, pol_name) ) return sounding_ids else: return file_sounding_ids def get_id_dim_names(self): dflt_dim_names = SOUNDING_ID_DIMENSIONS.get(self._sounding_id_dataset, None) if dflt_dim_names != None: return dflt_dim_names else: return SoundingDataFile.get_id_dim_names(self) def get_sounding_indexes(self, sounding_id): """Find sounding id through bisection, possibly slower than a dict lookup""" if self.instrument_name == GOSAT_INST_NAME: if str(sounding_id)[-1].upper() in GOSAT_POL_ORDER: pol_name = str(sounding_id)[-1] sounding_id = str(sounding_id)[:-1] pol_index = GOSAT_POL_ORDER.index(pol_name) else: pol_index = slice(len(GOSAT_POL_ORDER)) index_tuple = SoundingDataFile.get_sounding_indexes(self, sounding_id) index_tuple = index_tuple.__class__(index_tuple[0], pol_index) else: index_tuple = SoundingDataFile.get_sounding_indexes(self, sounding_id) return index_tuple def determine_instrument_name(self): for instrument_name, dataset_name in SOUNDING_ID_DATASET.items(): if self.get(dataset_name, None) != None: return instrument_name return None def get_info_dataset_name(self, info_name): """Looks up the dataset name that matches a given informative short name. The informative name refers to a data item that may have a different dataset name between different instruments or might have a different name in different versions of the product""" # Chained to look first in SOUNDING_INFO_DATASETS then ADDL_INFO_DATASET item_dataset_spec = SOUNDING_INFO_DATASETS.get(info_name.lower(), ADDL_INFO_DATASETS.get(info_name.lower(), None)) # Check if the info item undefined or defined for a specific instrument if item_dataset_spec == None: raise MissingDataset('Could not find dataset for info name: %s' % (info_name)) elif hasattr(item_dataset_spec, 'get'): item_dataset_name = item_dataset_spec.get(self.instrument_name, None) else: item_dataset_name = item_dataset_spec if item_dataset_name == None or len(item_dataset_name) == 0: raise MissingDataset('Dataset name for info item: %s and instrument: %s is empty' % (info_name, self.instrument_name)) return self.select_valid_dataset(item_dataset_name) def get_sounding_data(self, data_name, sounding_id=None, indexes=None, flatten=False, average=None, shape_names=None, **kwargs): # Get data from base class, snd_data should be a NamedShapeArray class snd_data = SoundingDataFile.get_sounding_data(self, data_name, sounding_id, indexes, **kwargs) if average != None: if average not in snd_data.named_shape._fields: raise AveragingError('Can not average over dimension: %s for data named: %s which is not in the dataset indexes: %s' % (average, data_name, snd_data.named_shape)) else: # Average on correct dimension or average the whole array because # its shape belongs to the one specified if hasattr(snd_data, 'shape'): snd_data = numpy.average(snd_data, snd_data.named_shape._fields.index(average)) else: snd_data = numpy.average(snd_data) if flatten: snd_data = numpy.ravel(snd_data) return snd_data def get_sounding_info(self, info_name, sounding_id=None, **kwargs): dataset_name = self.get_info_dataset_name(info_name) out_sounding_info = self.get_sounding_data(dataset_name, sounding_id, **kwargs) return out_sounding_info def get_sounding_info_dict(self, sounding_id, ignore_missing=False, as_strings=False, **kwargs): info_dict = {} for info_name in SOUNDING_INFO_DATASETS.keys(): try: # If problems averaging, try without it try: info_data = self.get_sounding_info(info_name, sounding_id, **kwargs) except AveragingError: tmp_kwargs = copy.copy(kwargs) tmp_kwargs['average'] = None info_data = self.get_sounding_info(info_name, sounding_id, **tmp_kwargs) except MissingDataset: # Ignore missing datasets if ignore_missing: continue else: raise if as_strings: if hasattr(info_data, '__iter__'): info_str = ' '.join([str(value) for value in numpy.ravel(info_data)]) else: info_str = str(info_data) info_dict[info_name] = info_str else: info_dict[info_name] = info_data return info_dict def get_sounding_time(self, sounding_id, **kwargs): # Calculate current time from TAI93 start tmp_kwargs = copy.copy(kwargs) tmp_kwargs['average'] = None sounding_times = self.get_sounding_data(self.select_valid_dataset(TIME_DATASET), sounding_id, **tmp_kwargs) time_structs = [] for curr_l1b_time in numpy.ravel(sounding_times): if type(curr_l1b_time) is str or type(curr_l1b_time) is numpy.str_: if len(curr_l1b_time) < TIME_STRING_EXPECT_LEN: raise Exception('Time string: "%s" from file: "%s" does not have the expected format length: %d' % (curr_l1b_time, self.filename, TIME_STRING_EXPECT_LEN)) parsed_time = datetime.datetime.strptime(curr_l1b_time[:TIME_STRING_EXPECT_LEN], TIME_STRING_FORMAT) unparsed_str = curr_l1b_time[TIME_STRING_EXPECT_LEN:].strip("Z") parsed_time = parsed_time + datetime.timedelta(seconds=float(unparsed_str)) time_structs.append( parsed_time.timetuple() ) else: tai_time = TAI93_START + datetime.timedelta(seconds=(curr_l1b_time + TAI93_LPSECS)) utc_time = tai64n.tai2utc(tai_time) time_structs.append( utc_time.timetuple() ) return tuple(time_structs) def get_radiance_data(self, sounding_id, **kwargs): radiance_data = [] for spec_dataset_name in RADIANCE_DATASETS: spec_dataset_full = '/%s/%s' % (RADIANCE_GROUP[self.instrument_name], spec_dataset_name) band_data = self.get_sounding_data(spec_dataset_full, sounding_id, **kwargs) radiance_data.append( band_data ) return tuple(radiance_data) def get_channel_counts(self, sounding_id, **kwargs): return tuple([ len(band_data[0]) for band_data in self.get_radiance_data(sounding_id, **kwargs) ]) def evaluate_dispersion(self, sounding_id, **kwargs): dispersion_coefs = self.get_sounding_info('dispersion', sounding_id, **kwargs) channel_counts = self.get_channel_counts(sounding_id, **kwargs) disp_eval = [] for band_coefs, band_len in zip(dispersion_coefs, channel_counts): band_poly = numpy.lib.polynomial.poly1d(band_coefs[::-1]) disp_eval.append( band_poly(numpy.arange(1,band_len+1)) ) return tuple(disp_eval) def get_wavenumbers(self, sounding_id, **kwargs): if self.instrument_name == GOSAT_INST_NAME: return self.evaluate_dispersion(sounding_id, **kwargs) else: return tuple([ 1e4/band_wvl for band_wvl in self.get_wavelengths(sounding_id, **kwargs) ]) def get_wavelengths(self, sounding_id, **kwargs): if self.instrument_name == OCO_INST_NAME: return self.evaluate_dispersion(sounding_id, **kwargs) else: return tuple([ 1e4/band_wn for band_wn in self.get_wavenumbers(sounding_id, **kwargs) ]) def get_error_data(self, sounding_id, calculate_noise=True, gain_code=None, **kwargs): # Process noise for each band and return error_data = [] if self.instrument_name == OCO_INST_NAME: if gain_code != None: raise Exception('gain_code not used for instrument: %s' % OCO_INST_NAME) index_tuple = self.get_sounding_indexes(sounding_id) error_data = [] for band_idx, band_radiance in enumerate(self.get_radiance_data(sounding_id, **kwargs)): photon_col = self[OCO_SNR_COEF_DATASET][band_idx, index_tuple[1], :, 0] bkgrnd_col = self[OCO_SNR_COEF_DATASET][band_idx, index_tuple[1], :, 1] if calculate_noise: tmp = (100.0e0 * band_radiance[:] / MAX_MEAS_SIGNAL[self.instrument_name][band_idx]) * photon_col[:]**2 tmp = numpy.sqrt(tmp + bkgrnd_col[:]**2) band_error = (MAX_MEAS_SIGNAL[self.instrument_name][band_idx]/100.0) * tmp else: band_error = (photon_col, bkgrnd_col) error_data.append(band_error) elif self.instrument_name == GOSAT_INST_NAME: if gain_code == None: gain_code = self.get_sounding_info('gain', sounding_id) if hasattr(gain_code, '__iter__'): if not numpy.all(gain_code == gain_code[0]): raise ValueError('sounding id: %s does not specify polarization name and averaging not enababled or gain codes differ for polarization channels: %s' % (sounding_id, gain_code)) else: gain_code = gain_code[0] for band_idx, band_name in enumerate(BAND_DATA_NAMES): cnv_dataset = GOSAT_CNV_COEF_DATASET[gain_code.strip()] % band_name noise_dataset = GOSAT_NOISE_DATASET % band_name cnv_col = self.get_sounding_data(cnv_dataset, sounding_id, **kwargs) band_noise = self.get_sounding_data(noise_dataset, sounding_id, **kwargs) if calculate_noise: error_data.append( cnv_col[:] * band_noise ) else: error_data.append( (cnv_col, band_noise) ) return tuple(error_data) def get_build_id(self): try: b_id_str = self[BUILD_ID_DATASET[self.instrument_name]] l1b_build_id = [] for id_part in b_id_str[0].replace('v','').split('.'): if id_part.isdigit(): l1b_build_id.append( int(id_part) ) else: l1b_build_id.append( id_part ) except KeyError: return (0,0,0) return tuple(l1b_build_id) def get_surface_grouping(self, sounding_id): surface_type = None try: # GOSAT and OCO L1B files have differing ways of representing the # land type if self.instrument_name == GOSAT_INST_NAME: land_value = self.get_sounding_info('land_fraction', sounding_id) land_values_check_dict = LAND_FRACTION_PERCENTAGE elif self.instrument_name == OCO_INST_NAME: land_value = self.get_sounding_info('land_water_indicator', sounding_id) land_values_check_dict = LAND_WATER_INDICATOR else: raise MissingDataset("Unknown instrument.") for type_name, land_check in land_values_check_dict.items(): if land_check(land_value): surface_type = type_name break except MissingDataset: # Fall back to trying to read out of a surface database file # Initalize as needed if self.surf_type_obj == None: self.surf_type_obj = ModisEcoMap() # Just use first lat/lon if sounding matches multiple polarizations, etc latitude = self.get_sounding_info('latitude', sounding_id, flatten=True)[0] longitude = self.get_sounding_info('longitude', sounding_id, flatten=True)[0] surface_type = self.surf_type_obj.get_surface_grouping(latitude, longitude) return surface_type