def get_data(sds): """ Reads raw data from an SD instance. :param sds: The specific sds instance to read :return: A numpy array containing the raw data with missing data is replaced by NaN. """ from cis.utils import create_masked_array_for_missing_data from cis.data_io.netcdf import apply_offset_and_scaling import numpy as np data = sds.get() attributes = sds.attributes() # Apply Fill Value missing_value = attributes.get('_FillValue', None) if missing_value is not None: data = create_masked_array_for_missing_data(data, missing_value) # Check for valid_range valid_range = attributes.get('valid_range', None) if valid_range is not None: data = np.ma.masked_outside(data, *valid_range) # Offsets and scaling. add_offset = attributes.get('add_offset', 0.0) scale_factor = attributes.get('scale_factor', 1.0) logging.warning( "Applying standard offset and scaling for dataset - this may not be appropriate for HDF_EOS data!" ) data = apply_offset_and_scaling(data, add_offset=add_offset, scale_factor=scale_factor) return data
def _get_MODIS_SDS_data(sds): """ Reads raw data from an SD instance. :param sds: The specific sds instance to read :return: A numpy array containing the raw data with missing data is replaced by NaN. """ from cis.utils import create_masked_array_for_missing_data import numpy as np data = sds.get() attributes = sds.attributes() # Apply Fill Value missing_value = attributes.get('_FillValue', None) if missing_value is not None: data = create_masked_array_for_missing_data(data, missing_value) # Check for valid_range valid_range = attributes.get('valid_range', None) if valid_range is not None: logging.debug("Masking all values {} > v > {}.".format(*valid_range)) data = np.ma.masked_outside(data, *valid_range) # Offsets and scaling. add_offset = attributes.get('add_offset', 0.0) scale_factor = attributes.get('scale_factor', 1.0) data = _apply_scaling_factor_MODIS(data, scale_factor, add_offset) return data
def _get_calipso_data(self, sds): """ Reads raw data from an SD instance. Automatically applies the scaling factors and offsets to the data arrays found in Calipso data. Returns: A numpy array containing the raw data with missing data is replaced by NaN. Arguments: sds -- The specific sds instance to read """ from cis.utils import create_masked_array_for_missing_data import numpy as np calipso_fill_values = {'Float_32': -9999.0, # 'Int_8' : 'See SDS description', 'Int_16': -9999, 'Int_32': -9999, 'UInt_8': -127, # 'UInt_16' : 'See SDS description', # 'UInt_32' : 'See SDS description', 'ExtinctionQC Fill Value': 32768, 'FeatureFinderQC No Features Found': 32767, 'FeatureFinderQC Fill Value': 65535} data = sds.get() attributes = sds.attributes() # Missing data. First try 'fillvalue' missing_val = attributes.get('fillvalue', None) if missing_val is None: try: # Now try and lookup the fill value based on the data type missing_val = calipso_fill_values[attributes.get('format', None)] except KeyError: # Last guess missing_val = attributes.get('_FillValue', None) if missing_val is not None: data = create_masked_array_for_missing_data(data, float(missing_val)) # Now handle valid range mask valid_range = attributes.get('valid_range', None) if valid_range is not None: # Split the range into two numbers of the right type (removing commas in the floats...) v_range = np.asarray(valid_range.replace(',','').split("..."), dtype=data.dtype) # Some valid_ranges appear to have only one value, so ignore those... if (len(v_range) == 2) and v_range[0] < v_range[1]: logging.debug("Masking all values {} > v > {}.".format(*v_range)) data = np.ma.masked_outside(data, *v_range) else: logging.warning("Invalid valid_range: {}. Not masking values.".format(valid_range)) # Offsets and scaling. offset = attributes.get('add_offset', 0) scale_factor = attributes.get('scale_factor', 1) data = self._apply_scaling_factor_CALIPSO(data, scale_factor, offset) return data
def _get_MODIS_SDS_data(sds): """ Reads raw data from an SD instance. :param sds: The specific sds instance to read :return: A numpy array containing the raw data with missing data is replaced by NaN. """ from cis.utils import create_masked_array_for_missing_data import numpy as np data = sds.get() attributes = sds.attributes() # Apply Fill Value missing_value = attributes.get('_FillValue', None) if missing_value is not None: data = create_masked_array_for_missing_data(data, missing_value) # Check for valid_range valid_range = attributes.get('valid_range', None) if valid_range is not None: logging.debug("Masking all values {} > v > {}.".format(*valid_range)) data = np.ma.masked_outside(data, *valid_range) # Offsets and scaling. add_offset = attributes.get('add_offset', 0.0) scale_factor = attributes.get('scale_factor', 1.0) data = _apply_scaling_factor_MODIS(data, scale_factor, add_offset) return data
def get_data(sds): """ Reads raw data from an SD instance. :param sds: The specific sds instance to read :return: A numpy array containing the raw data with missing data is replaced by NaN. """ from cis.utils import create_masked_array_for_missing_data from cis.data_io.netcdf import apply_offset_and_scaling import numpy as np data = sds.get() attributes = sds.attributes() # Apply Fill Value missing_value = attributes.get('_FillValue', None) if missing_value is not None: data = create_masked_array_for_missing_data(data, missing_value) # Check for valid_range valid_range = attributes.get('valid_range', None) if valid_range is not None: data = np.ma.masked_outside(data, *valid_range) # Offsets and scaling. add_offset = attributes.get('add_offset', 0.0) scale_factor = attributes.get('scale_factor', 1.0) logging.warning("Applying standard offset and scaling for dataset - this may not be appropriate for HDF_EOS data!") data = apply_offset_and_scaling(data, add_offset=add_offset, scale_factor=scale_factor) return data
def _get_cloudsat_sds_data(self, sds): """ Reads raw data from an SD instance. Automatically applies the scaling factors and offsets to the data arrays often found in NASA HDF-EOS data (e.g. MODIS) :param sds: The specific sds instance to read :return: A numpy array containing the raw data with missing data is replaced by NaN. """ from cis.utils import create_masked_array_for_missing_data import numpy as np data = sds.get() attributes = sds.attributes() # First deal with the Fill value fill_value = attributes.get('_FillValue', None) if fill_value is not None: data = create_masked_array_for_missing_data(data, fill_value) # TODO: This needs some explict integration and unit tests # Then deal with missing values missop_fn = { '<': np.ma.masked_less, '<=': np.ma.masked_less_equal, '==': np.ma.masked_equal, '=>': np.ma.masked_greater_equal, '>': np.ma.masked_greater } missing = attributes.get('missing', None) missop = attributes.get('missop', None) if missing is not None and missop is not None: try: logging.debug("Masking all values v {} {}".format( missop, missing)) data = missop_fn[missop](data, missing) except KeyError: logging.warning("Unable to identify missop {}, unable to " "mask missing values for {}.".format( missop, sds.info()[0])) # Now handle valid range mask valid_range = attributes.get('valid_range', None) if valid_range is not None: # Assume it's the right data type already logging.debug( "Masking all values {} > v > {}.".format(*valid_range)) data = np.ma.masked_outside(data, *valid_range) # Offsets and scaling. offset = attributes.get('offset', 0) scale_factor = attributes.get('factor', 1) data = self._apply_scaling_factor_CLOUDSAT(data, scale_factor, offset) return data
def _get_MODIS_SDS_data(sds, start=None, count=None, stride=None): """ Reads raw data from an SD instance. :param sds: The specific sds instance to read :param start: List of indices to start reading from each dimension :param count: List of number of data to read from each dimension :param stride: List of strides to read from each dimension :return: A numpy array containing the raw data with missing data is replaced by NaN. """ from cis.utils import create_masked_array_for_missing_data, listify from cis.data_io.products.MODIS import _apply_scaling_factor_MODIS from numpy.ma import masked_outside start = [] if start is None else listify(start) count = [] if count is None else listify(count) stride = [] if stride is None else listify(stride) _, ndim, dim_len, _, _ = sds.info() # Assume full read of all omitted dimensions while len(start) < ndim: start += [0] while len(count) < ndim: count += [-1] while len(stride) < ndim: stride += [1] # Allow lazy notation for "read all" count = [ n if n >= 0 else l - x0 for x0, n, l in zip(start, count, dim_len) ] data = sds.get(start, count, stride).squeeze() attributes = sds.attributes() # Apply Fill Value missing_value = attributes.get('_FillValue', None) if missing_value is not None: data = create_masked_array_for_missing_data(data, missing_value) # Check for valid_range valid_range = attributes.get('valid_range', None) if valid_range is not None: data = masked_outside(data, *valid_range) # Offsets and scaling. add_offset = attributes.get('add_offset', 0.0) scale_factor = attributes.get('scale_factor', 1.0) data = _apply_scaling_factor_MODIS(data, scale_factor, add_offset) return data
def get_calipso_data(self, sds): """ Reads raw data from an SD instance. Automatically applies the scaling factors and offsets to the data arrays found in Calipso data. Returns: A numpy array containing the raw data with missing data is replaced by NaN. Arguments: sds -- The specific sds instance to read """ from cis.utils import create_masked_array_for_missing_data calipso_fill_values = {'Float_32': -9999.0, # 'Int_8' : 'See SDS description', 'Int_16': -9999, 'Int_32': -9999, 'UInt_8': -127, # 'UInt_16' : 'See SDS description', # 'UInt_32' : 'See SDS description', 'ExtinctionQC Fill Value': 32768, 'FeatureFinderQC No Features Found': 32767, 'FeatureFinderQC Fill Value': 65535} data = sds.get() attributes = sds.attributes() # Missing data. missing_val = attributes.get('fillvalue', None) if missing_val is None: try: missing_val = calipso_fill_values[attributes.get('format', None)] except KeyError: # Last guess missing_val = attributes.get('_FillValue', None) data = create_masked_array_for_missing_data(data, missing_val) # Offsets and scaling. offset = attributes.get('add_offset', 0) scale_factor = attributes.get('scale_factor', 1) data = self.apply_scaling_factor_CALIPSO(data, scale_factor, offset) return data
def _get_cloudsat_vds_data(self, vds): from cis.data_io.hdf_vd import _get_attribute_value, HDF, HDF4Error from cis.utils import create_masked_array_for_missing_data import numpy as np # get file and variable reference from tuple filename = vds.filename variable = vds.variable try: datafile = HDF(filename) except HDF4Error as e: raise IOError(e) vs = datafile.vstart() vd = vs.attach(variable) data = vd.read(nRec=vd.inquire()[0]) # create numpy array from data data = np.array(data).flatten() missing_value = _get_attribute_value(vd, 'missing', None) if missing_value is not None: data = create_masked_array_for_missing_data(data, missing_value) valid_range = _get_attribute_value(vd, "valid_range") if valid_range is not None: # Assume it's the right data type already data = np.ma.masked_outside(data, *valid_range) # TODO This probably won't work.... factor = _get_attribute_value(vd, "factor", 1) offset = _get_attribute_value(vd, "offset", 0) data = self._apply_scaling_factor_CLOUDSAT(data, factor, offset) # detach and close vd.detach() vs.end() datafile.close() return data
def _get_cloudsat_vds_data(self, vds): from cis.data_io.hdf_vd import _get_attribute_value, HDF, HDF4Error from cis.utils import create_masked_array_for_missing_data import numpy as np # get file and variable reference from tuple filename = vds.filename variable = vds.variable try: datafile = HDF(filename) except HDF4Error as e: raise IOError(e) vs = datafile.vstart() vd = vs.attach(variable) data = vd.read(nRec=vd.inquire()[0]) # create numpy array from data data = np.array(data).flatten() missing_value = _get_attribute_value(vd, 'missing', None) if missing_value is not None: data = create_masked_array_for_missing_data(data, missing_value) valid_range = _get_attribute_value(vd, "valid_range") if valid_range is not None: # Assume it's the right data type already data = np.ma.masked_outside(data, *valid_range) # TODO This probably won't work.... factor = _get_attribute_value(vd, "factor", 1) offset = _get_attribute_value(vd, "offset", 0) data = self._apply_scaling_factor_CLOUDSAT(data, factor, offset) # detach and close vd.detach() vs.end() datafile.close() return data
def _get_cloudsat_sds_data(self, sds): """ Reads raw data from an SD instance. Automatically applies the scaling factors and offsets to the data arrays often found in NASA HDF-EOS data (e.g. MODIS) :param sds: The specific sds instance to read :return: A numpy array containing the raw data with missing data is replaced by NaN. """ from cis.utils import create_masked_array_for_missing_data import numpy as np from cis.data_io.hdf_vd import VDS, get_data from pyhdf.error import HDF4Error data = sds.get() attributes = sds.attributes() # First deal with the Fill value fill_value = attributes.get('_FillValue', None) if fill_value is not None: data = create_masked_array_for_missing_data(data, fill_value) # TODO: This needs some explict integration and unit tests # Then deal with missing values missop_fn = {'<': np.ma.masked_less, '<=': np.ma.masked_less_equal, '==': np.ma.masked_equal, '=>': np.ma.masked_greater_equal, '>': np.ma.masked_greater, # TODO Note that this is wrong but seems to be what is meant, for Cloud_Effective_Radius at # least... 'ge': np.ma.masked_equal, 'eq': np.ma.masked_equal} missing = attributes.get('missing', None) missop = attributes.get('missop', None) if missing is not None and missop is not None: try: logging.debug("Masking all values v {} {}".format(missop, missing)) data = missop_fn[missop](data, missing) except KeyError: logging.warning("Unable to identify missop {}, unable to " "mask missing values for {}.".format(missop, sds.info()[0])) # Now handle valid range mask valid_range = attributes.get('valid_range', None) if valid_range is not None: # Assume it's the right data type already logging.debug("Masking all values {} > v > {}.".format(*valid_range)) data = np.ma.masked_outside(data, *valid_range) # Offsets and scaling - these come from Vdata variables with the appropraite suffixes try: offset = get_data(VDS(sds._filename, sds._variable + "_add_offset"))[0] except HDF4Error: print("WARNING: Couldn't find offset variable " + sds._variable + "_add_offset") offset = 0 try: scale_factor = get_data(VDS(sds._filename, sds._variable + "_scale_factor"))[0] except HDF4Error: print("WARNING: Couldn't find scale factor variable " + sds._variable + "_scale_factor") scale_factor = 1 data = self._apply_scaling_factor_CLOUDSAT(data, scale_factor, offset) return data
def _get_cloudsat_sds_data(self, sds): """ Reads raw data from an SD instance. Automatically applies the scaling factors and offsets to the data arrays often found in NASA HDF-EOS data (e.g. MODIS) :param sds: The specific sds instance to read :return: A numpy array containing the raw data with missing data is replaced by NaN. """ from cis.utils import create_masked_array_for_missing_data import numpy as np from cis.data_io.hdf_vd import VDS, get_data from pyhdf.error import HDF4Error data = sds.get() attributes = sds.attributes() # First deal with the Fill value fill_value = attributes.get('_FillValue', None) if fill_value is not None: data = create_masked_array_for_missing_data(data, fill_value) # TODO: This needs some explict integration and unit tests # Then deal with missing values missop_fn = { '<': np.ma.masked_less, '<=': np.ma.masked_less_equal, '==': np.ma.masked_equal, '=>': np.ma.masked_greater_equal, '>': np.ma.masked_greater, # TODO Note that this is wrong but seems to be what is meant, for Cloud_Effective_Radius at # least... 'ge': np.ma.masked_equal, 'eq': np.ma.masked_equal } missing = attributes.get('missing', None) missop = attributes.get('missop', None) if missing is not None and missop is not None: try: logging.debug("Masking all values v {} {}".format( missop, missing)) data = missop_fn[missop](data, missing) except KeyError: logging.warning("Unable to identify missop {}, unable to " "mask missing values for {}.".format( missop, sds.info()[0])) # Now handle valid range mask valid_range = attributes.get('valid_range', None) if valid_range is not None: # Assume it's the right data type already logging.debug( "Masking all values {} > v > {}.".format(*valid_range)) data = np.ma.masked_outside(data, *valid_range) # Offsets and scaling - these come from Vdata variables with the appropraite suffixes try: offset = get_data(VDS(sds._filename, sds._variable + "_add_offset"))[0] except HDF4Error: print("WARNING: Couldn't find offset variable " + sds._variable + "_add_offset") offset = 0 try: scale_factor = get_data( VDS(sds._filename, sds._variable + "_scale_factor"))[0] except HDF4Error: print("WARNING: Couldn't find scale factor variable " + sds._variable + "_scale_factor") scale_factor = 1 data = self._apply_scaling_factor_CLOUDSAT(data, scale_factor, offset) return data