def get_surface_grouping(self, sounding_id):

        surface_type = None

        try:
            # GOSAT and OCO L1B files have differing ways of representing the
            # land type
            if self.instrument_name == GOSAT_INST_NAME:
                land_value = self.get_sounding_info('land_fraction', sounding_id)
                land_values_check_dict = LAND_FRACTION_PERCENTAGE
            elif self.instrument_name == OCO_INST_NAME:
                land_value = self.get_sounding_info('land_water_indicator', sounding_id)
                land_values_check_dict = LAND_WATER_INDICATOR
            else:
                raise MissingDataset("Unknown instrument.")

            for type_name, land_check in land_values_check_dict.items():
                if land_check(land_value):
                    surface_type = type_name
                    break

        except MissingDataset:
            # Fall back to trying to read out of a surface database file
            # Initalize as needed
            if self.surf_type_obj == None:
                self.surf_type_obj = ModisEcoMap()

            # Just use first lat/lon if sounding matches multiple polarizations, etc
            latitude  = self.get_sounding_info('latitude', sounding_id, flatten=True)[0]
            longitude = self.get_sounding_info('longitude', sounding_id, flatten=True)[0]

            surface_type = self.surf_type_obj.get_surface_grouping(latitude, longitude)

        return surface_type
class L1B(SoundingDataFile):
    
    def __init__(self, filename, mode=None, **kwargs):
        # Init super-class
        SoundingDataFile.__init__(self, filename, mode, **kwargs)

        # Initialize as needed
        self.surf_type_obj = None

        # Needed internally for selecting correct dataset
        self.instrument_name = self.determine_instrument_name()

        # See if the file type is one we recognize
        if self.instrument_name not in (GOSAT_INST_NAME, OCO_INST_NAME):
            raise LookupError('Unrecognized instrument name detected: %s' % self.instrument_name)

        self._sounding_id_dataset = SOUNDING_ID_DATASET[self.instrument_name]

        self._data_shape_name_dict = SPECIFIC_L1B_INFO_SHAPE_NAMES
        self._default_shape_names = DEFAULT_L1B_INFO_SHAPE_NAMES[self.instrument_name]
        
    def get_sounding_ids(self, add_polarization=False):
        file_sounding_ids = SoundingDataFile.get_sounding_ids(self)
        
        if add_polarization and self.instrument_name == GOSAT_INST_NAME:
            sounding_ids = []

            for curr_id in file_sounding_ids:
                for pol_name in GOSAT_POL_ORDER:
                    sounding_ids.append( '%d%s' % (curr_id, pol_name) )

            return sounding_ids
        else:
            return file_sounding_ids

    def get_id_dim_names(self):
        dflt_dim_names = SOUNDING_ID_DIMENSIONS.get(self._sounding_id_dataset, None)
        if dflt_dim_names != None:
            return dflt_dim_names
        else:
            return SoundingDataFile.get_id_dim_names(self)

    def get_sounding_indexes(self, sounding_id):
        """Find sounding id through bisection, possibly slower than a dict lookup"""
        
        if self.instrument_name == GOSAT_INST_NAME:
            if str(sounding_id)[-1].upper() in GOSAT_POL_ORDER:
                pol_name = str(sounding_id)[-1]
                sounding_id = str(sounding_id)[:-1]
                pol_index = GOSAT_POL_ORDER.index(pol_name)
            else:
                pol_index = slice(len(GOSAT_POL_ORDER))

            index_tuple = SoundingDataFile.get_sounding_indexes(self, sounding_id)
            index_tuple = index_tuple.__class__(index_tuple[0], pol_index)
        else:
            index_tuple = SoundingDataFile.get_sounding_indexes(self, sounding_id)

        return index_tuple

    def determine_instrument_name(self):
        for instrument_name, dataset_name in SOUNDING_ID_DATASET.items():
            if self.get(dataset_name, None) != None:
                return instrument_name
        return None

    def get_info_dataset_name(self, info_name):
        """Looks up the dataset name that matches a given informative short name. The informative name refers
        to a data item that may have a different dataset name between different instruments or might have a
        different name in different versions of the product"""
        
        # Chained to look first in SOUNDING_INFO_DATASETS then ADDL_INFO_DATASET
        item_dataset_spec = SOUNDING_INFO_DATASETS.get(info_name.lower(), ADDL_INFO_DATASETS.get(info_name.lower(), None))

        # Check if the info item undefined or defined for a specific instrument
        if item_dataset_spec == None:
            raise MissingDataset('Could not find dataset for info name: %s' % (info_name))
        elif hasattr(item_dataset_spec, 'get'):
            item_dataset_name = item_dataset_spec.get(self.instrument_name, None)
        else:
            item_dataset_name = item_dataset_spec

        if item_dataset_name == None or len(item_dataset_name) == 0:
            raise MissingDataset('Dataset name for info item: %s and instrument: %s is empty' % (info_name, self.instrument_name))

        return self.select_valid_dataset(item_dataset_name)

    def get_sounding_data(self, data_name, sounding_id=None, indexes=None, flatten=False, average=None, shape_names=None, **kwargs):
        # Get data from base class, snd_data should be a NamedShapeArray class
        snd_data = SoundingDataFile.get_sounding_data(self, data_name, sounding_id, indexes, **kwargs)

        if average != None:
            if average not in snd_data.named_shape._fields:
                raise AveragingError('Can not average over dimension: %s for data named: %s which is not in the dataset indexes: %s' % (average, data_name, snd_data.named_shape))
            else:
                # Average on correct dimension or average the whole array because
                # its shape belongs to the one specified
                if hasattr(snd_data, 'shape'):
                    snd_data = numpy.average(snd_data, snd_data.named_shape._fields.index(average))
                else:
                    snd_data = numpy.average(snd_data)

        if flatten:
            snd_data = numpy.ravel(snd_data)

        return snd_data

                        
    def get_sounding_info(self, info_name, sounding_id=None, **kwargs):

        dataset_name = self.get_info_dataset_name(info_name)
        out_sounding_info = self.get_sounding_data(dataset_name, sounding_id, **kwargs)

        return out_sounding_info

    def get_sounding_info_dict(self, sounding_id, ignore_missing=False, as_strings=False, **kwargs):
        info_dict = {}
        for info_name in SOUNDING_INFO_DATASETS.keys():
            try:

                # If problems averaging, try without it
                try:
                    info_data = self.get_sounding_info(info_name, sounding_id, **kwargs)
                except AveragingError:
                    tmp_kwargs = copy.copy(kwargs)
                    tmp_kwargs['average'] = None
                    info_data = self.get_sounding_info(info_name, sounding_id, **tmp_kwargs)

            except MissingDataset:
                # Ignore missing datasets
                if ignore_missing:
                    continue
                else:
                    raise 

            if as_strings:
                if hasattr(info_data, '__iter__'):
                    info_str = ' '.join([str(value) for value in numpy.ravel(info_data)])
                else:
                    info_str = str(info_data)


                info_dict[info_name] = info_str
            else:
                info_dict[info_name] = info_data


        return info_dict

    def get_sounding_time(self, sounding_id, **kwargs):
        # Calculate current time from TAI93 start
        tmp_kwargs = copy.copy(kwargs)
        tmp_kwargs['average'] = None

        sounding_times = self.get_sounding_data(self.select_valid_dataset(TIME_DATASET), sounding_id, **tmp_kwargs)

        time_structs = []
        for curr_l1b_time in numpy.ravel(sounding_times):
            if type(curr_l1b_time) is str or type(curr_l1b_time) is numpy.str_:

                if len(curr_l1b_time) < TIME_STRING_EXPECT_LEN:
                    raise Exception('Time string: "%s" from file: "%s" does not have the expected format length: %d' % (curr_l1b_time, self.filename, TIME_STRING_EXPECT_LEN))

                parsed_time = datetime.datetime.strptime(curr_l1b_time[:TIME_STRING_EXPECT_LEN], TIME_STRING_FORMAT)
                unparsed_str = curr_l1b_time[TIME_STRING_EXPECT_LEN:].strip("Z")
                parsed_time = parsed_time + datetime.timedelta(seconds=float(unparsed_str))

                time_structs.append( parsed_time.timetuple() )
            else:
                tai_time = TAI93_START + datetime.timedelta(seconds=(curr_l1b_time + TAI93_LPSECS))
                utc_time = tai64n.tai2utc(tai_time)
                time_structs.append( utc_time.timetuple() )
            
        return tuple(time_structs)

    def get_radiance_data(self, sounding_id, **kwargs):
        radiance_data = []
        for spec_dataset_name in RADIANCE_DATASETS:
            spec_dataset_full = '/%s/%s' % (RADIANCE_GROUP[self.instrument_name], spec_dataset_name)
            
            band_data = self.get_sounding_data(spec_dataset_full, sounding_id, **kwargs)
            
            radiance_data.append( band_data )
                         
        return tuple(radiance_data)

    def get_channel_counts(self, sounding_id, **kwargs):
        return tuple([ len(band_data[0]) for band_data in self.get_radiance_data(sounding_id, **kwargs) ])

    def evaluate_dispersion(self, sounding_id, **kwargs):
        dispersion_coefs = self.get_sounding_info('dispersion', sounding_id, **kwargs)
        channel_counts   = self.get_channel_counts(sounding_id, **kwargs)

        disp_eval = []
        for band_coefs, band_len in zip(dispersion_coefs, channel_counts):
            band_poly = numpy.lib.polynomial.poly1d(band_coefs[::-1])
            disp_eval.append( band_poly(numpy.arange(1,band_len+1)) )

        return tuple(disp_eval)

    def get_wavenumbers(self, sounding_id, **kwargs):
        if self.instrument_name == GOSAT_INST_NAME:
            return self.evaluate_dispersion(sounding_id, **kwargs)
        else:
            return tuple([ 1e4/band_wvl for band_wvl in self.get_wavelengths(sounding_id, **kwargs) ])

    def get_wavelengths(self, sounding_id, **kwargs):
        if self.instrument_name == OCO_INST_NAME:
            return self.evaluate_dispersion(sounding_id, **kwargs)
        else:
            return tuple([ 1e4/band_wn for band_wn in self.get_wavenumbers(sounding_id, **kwargs) ])

    def get_error_data(self, sounding_id, calculate_noise=True, gain_code=None, **kwargs):

        # Process noise for each band and return 
        error_data = []
        
        if self.instrument_name == OCO_INST_NAME:
            if gain_code != None:
                raise Exception('gain_code not used for instrument: %s' % OCO_INST_NAME)
            
            index_tuple = self.get_sounding_indexes(sounding_id)
            
            error_data = []
            for band_idx, band_radiance in enumerate(self.get_radiance_data(sounding_id, **kwargs)):
                photon_col = self[OCO_SNR_COEF_DATASET][band_idx, index_tuple[1], :, 0]
                bkgrnd_col = self[OCO_SNR_COEF_DATASET][band_idx, index_tuple[1], :, 1]

                if calculate_noise:
                    tmp = (100.0e0 * band_radiance[:] / MAX_MEAS_SIGNAL[self.instrument_name][band_idx]) * photon_col[:]**2
                    tmp = numpy.sqrt(tmp + bkgrnd_col[:]**2)
                    band_error = (MAX_MEAS_SIGNAL[self.instrument_name][band_idx]/100.0) * tmp
                else:
                    band_error = (photon_col, bkgrnd_col)

                error_data.append(band_error)
       
        elif self.instrument_name == GOSAT_INST_NAME:

            if gain_code == None:
                gain_code = self.get_sounding_info('gain', sounding_id)
                if hasattr(gain_code, '__iter__'):
                    if not numpy.all(gain_code == gain_code[0]):
                        raise ValueError('sounding id: %s does not specify polarization name and averaging not enababled or gain codes differ for polarization channels: %s' % (sounding_id, gain_code))
                    else:
                        gain_code = gain_code[0]
            
            for band_idx, band_name in enumerate(BAND_DATA_NAMES):
                cnv_dataset = GOSAT_CNV_COEF_DATASET[gain_code.strip()] % band_name
                noise_dataset = GOSAT_NOISE_DATASET % band_name

                cnv_col    = self.get_sounding_data(cnv_dataset, sounding_id, **kwargs)
                band_noise = self.get_sounding_data(noise_dataset, sounding_id, **kwargs)

                if calculate_noise:
                    error_data.append( cnv_col[:] * band_noise )
                else:
                    error_data.append( (cnv_col, band_noise) )
            
        return tuple(error_data)

    def get_build_id(self):
        try:
            b_id_str = self[BUILD_ID_DATASET[self.instrument_name]]
            l1b_build_id = []
            for id_part in b_id_str[0].replace('v','').split('.'):
                if id_part.isdigit():
                    l1b_build_id.append( int(id_part) )
                else:
                    l1b_build_id.append( id_part )
        except KeyError:
            return (0,0,0)
       
        return tuple(l1b_build_id)

    def get_surface_grouping(self, sounding_id):

        surface_type = None

        try:
            # GOSAT and OCO L1B files have differing ways of representing the
            # land type
            if self.instrument_name == GOSAT_INST_NAME:
                land_value = self.get_sounding_info('land_fraction', sounding_id)
                land_values_check_dict = LAND_FRACTION_PERCENTAGE
            elif self.instrument_name == OCO_INST_NAME:
                land_value = self.get_sounding_info('land_water_indicator', sounding_id)
                land_values_check_dict = LAND_WATER_INDICATOR
            else:
                raise MissingDataset("Unknown instrument.")

            for type_name, land_check in land_values_check_dict.items():
                if land_check(land_value):
                    surface_type = type_name
                    break

        except MissingDataset:
            # Fall back to trying to read out of a surface database file
            # Initalize as needed
            if self.surf_type_obj == None:
                self.surf_type_obj = ModisEcoMap()

            # Just use first lat/lon if sounding matches multiple polarizations, etc
            latitude  = self.get_sounding_info('latitude', sounding_id, flatten=True)[0]
            longitude = self.get_sounding_info('longitude', sounding_id, flatten=True)[0]

            surface_type = self.surf_type_obj.get_surface_grouping(latitude, longitude)

        return surface_type