コード例 #1
0
def get_data(sds):
    """
    Reads raw data from an SD instance.

    :param sds: The specific sds instance to read
    :return: A numpy array containing the raw data with missing data is replaced by NaN.
    """
    from cis.utils import create_masked_array_for_missing_data
    from cis.data_io.netcdf import apply_offset_and_scaling
    import numpy as np

    data = sds.get()
    attributes = sds.attributes()

    # Apply Fill Value
    missing_value = attributes.get('_FillValue', None)
    if missing_value is not None:
        data = create_masked_array_for_missing_data(data, missing_value)

    # Check for valid_range
    valid_range = attributes.get('valid_range', None)
    if valid_range is not None:
        data = np.ma.masked_outside(data, *valid_range)

    # Offsets and scaling.
    add_offset = attributes.get('add_offset', 0.0)
    scale_factor = attributes.get('scale_factor', 1.0)
    logging.warning(
        "Applying standard offset and scaling for dataset - this may not be appropriate for HDF_EOS data!"
    )
    data = apply_offset_and_scaling(data,
                                    add_offset=add_offset,
                                    scale_factor=scale_factor)

    return data
コード例 #2
0
ファイル: MODIS.py プロジェクト: tommibergman/cis
def _get_MODIS_SDS_data(sds):
    """
    Reads raw data from an SD instance.

    :param sds: The specific sds instance to read
    :return: A numpy array containing the raw data with missing data is replaced by NaN.
    """
    from cis.utils import create_masked_array_for_missing_data
    import numpy as np

    data = sds.get()
    attributes = sds.attributes()

    # Apply Fill Value
    missing_value = attributes.get('_FillValue', None)
    if missing_value is not None:
        data = create_masked_array_for_missing_data(data, missing_value)

    # Check for valid_range
    valid_range = attributes.get('valid_range', None)
    if valid_range is not None:
        logging.debug("Masking all values {} > v > {}.".format(*valid_range))
        data = np.ma.masked_outside(data, *valid_range)

    # Offsets and scaling.
    add_offset = attributes.get('add_offset', 0.0)
    scale_factor = attributes.get('scale_factor', 1.0)
    data = _apply_scaling_factor_MODIS(data, scale_factor, add_offset)

    return data
コード例 #3
0
    def _get_calipso_data(self, sds):
        """
        Reads raw data from an SD instance. Automatically applies the
        scaling factors and offsets to the data arrays found in Calipso data.

        Returns:
            A numpy array containing the raw data with missing data is replaced by NaN.

        Arguments:
            sds        -- The specific sds instance to read

        """
        from cis.utils import create_masked_array_for_missing_data
        import numpy as np

        calipso_fill_values = {'Float_32': -9999.0,
                               # 'Int_8' : 'See SDS description',
                               'Int_16': -9999,
                               'Int_32': -9999,
                               'UInt_8': -127,
                               # 'UInt_16' : 'See SDS description',
                               # 'UInt_32' : 'See SDS description',
                               'ExtinctionQC Fill Value': 32768,
                               'FeatureFinderQC No Features Found': 32767,
                               'FeatureFinderQC Fill Value': 65535}

        data = sds.get()
        attributes = sds.attributes()

        # Missing data. First try 'fillvalue'
        missing_val = attributes.get('fillvalue', None)
        if missing_val is None:
            try:
                # Now try and lookup the fill value based on the data type
                missing_val = calipso_fill_values[attributes.get('format', None)]
            except KeyError:
                # Last guess
                missing_val = attributes.get('_FillValue', None)

        if missing_val is not None:
            data = create_masked_array_for_missing_data(data, float(missing_val))

        # Now handle valid range mask
        valid_range = attributes.get('valid_range', None)
        if valid_range is not None:
            # Split the range into two numbers of the right type (removing commas in the floats...)
            v_range = np.asarray(valid_range.replace(',','').split("..."), dtype=data.dtype)
            # Some valid_ranges appear to have only one value, so ignore those...
            if (len(v_range) == 2) and v_range[0] < v_range[1]:
                logging.debug("Masking all values {} > v > {}.".format(*v_range))
                data = np.ma.masked_outside(data, *v_range)
            else:
                logging.warning("Invalid valid_range: {}. Not masking values.".format(valid_range))

        # Offsets and scaling.
        offset = attributes.get('add_offset', 0)
        scale_factor = attributes.get('scale_factor', 1)
        data = self._apply_scaling_factor_CALIPSO(data, scale_factor, offset)

        return data
コード例 #4
0
ファイル: MODIS.py プロジェクト: cedadev/cis
def _get_MODIS_SDS_data(sds):
    """
    Reads raw data from an SD instance.

    :param sds: The specific sds instance to read
    :return: A numpy array containing the raw data with missing data is replaced by NaN.
    """
    from cis.utils import create_masked_array_for_missing_data
    import numpy as np

    data = sds.get()
    attributes = sds.attributes()

    # Apply Fill Value
    missing_value = attributes.get('_FillValue', None)
    if missing_value is not None:
        data = create_masked_array_for_missing_data(data, missing_value)

    # Check for valid_range
    valid_range = attributes.get('valid_range', None)
    if valid_range is not None:
        logging.debug("Masking all values {} > v > {}.".format(*valid_range))
        data = np.ma.masked_outside(data, *valid_range)

    # Offsets and scaling.
    add_offset = attributes.get('add_offset', 0.0)
    scale_factor = attributes.get('scale_factor', 1.0)
    data = _apply_scaling_factor_MODIS(data, scale_factor, add_offset)

    return data
コード例 #5
0
ファイル: hdf_sd.py プロジェクト: cedadev/cis
def get_data(sds):
    """
    Reads raw data from an SD instance.

    :param sds: The specific sds instance to read
    :return: A numpy array containing the raw data with missing data is replaced by NaN.
    """
    from cis.utils import create_masked_array_for_missing_data
    from cis.data_io.netcdf import apply_offset_and_scaling
    import numpy as np

    data = sds.get()
    attributes = sds.attributes()

    # Apply Fill Value
    missing_value = attributes.get('_FillValue', None)
    if missing_value is not None:
        data = create_masked_array_for_missing_data(data, missing_value)

    # Check for valid_range
    valid_range = attributes.get('valid_range', None)
    if valid_range is not None:
        data = np.ma.masked_outside(data, *valid_range)

    # Offsets and scaling.
    add_offset = attributes.get('add_offset', 0.0)
    scale_factor = attributes.get('scale_factor', 1.0)
    logging.warning("Applying standard offset and scaling for dataset - this may not be appropriate for HDF_EOS data!")
    data = apply_offset_and_scaling(data, add_offset=add_offset, scale_factor=scale_factor)

    return data
コード例 #6
0
ファイル: cloudsat.py プロジェクト: tommibergman/cis
    def _get_cloudsat_sds_data(self, sds):
        """
        Reads raw data from an SD instance. Automatically applies the
        scaling factors and offsets to the data arrays often found in NASA HDF-EOS
        data (e.g. MODIS)

        :param sds: The specific sds instance to read
        :return: A numpy array containing the raw data with missing data is replaced by NaN.
        """
        from cis.utils import create_masked_array_for_missing_data
        import numpy as np
        data = sds.get()
        attributes = sds.attributes()

        # First deal with the Fill value
        fill_value = attributes.get('_FillValue', None)

        if fill_value is not None:
            data = create_masked_array_for_missing_data(data, fill_value)

        # TODO: This needs some explict integration and unit tests
        # Then deal with missing values
        missop_fn = {
            '<': np.ma.masked_less,
            '<=': np.ma.masked_less_equal,
            '==': np.ma.masked_equal,
            '=>': np.ma.masked_greater_equal,
            '>': np.ma.masked_greater
        }

        missing = attributes.get('missing', None)
        missop = attributes.get('missop', None)
        if missing is not None and missop is not None:
            try:
                logging.debug("Masking all values v {} {}".format(
                    missop, missing))
                data = missop_fn[missop](data, missing)
            except KeyError:
                logging.warning("Unable to identify missop {}, unable to "
                                "mask missing values for {}.".format(
                                    missop,
                                    sds.info()[0]))

        # Now handle valid range mask
        valid_range = attributes.get('valid_range', None)
        if valid_range is not None:
            # Assume it's the right data type already
            logging.debug(
                "Masking all values {} > v > {}.".format(*valid_range))
            data = np.ma.masked_outside(data, *valid_range)

        # Offsets and scaling.
        offset = attributes.get('offset', 0)
        scale_factor = attributes.get('factor', 1)
        data = self._apply_scaling_factor_CLOUDSAT(data, scale_factor, offset)

        return data
コード例 #7
0
def _get_MODIS_SDS_data(sds, start=None, count=None, stride=None):
    """
    Reads raw data from an SD instance.

    :param sds: The specific sds instance to read
    :param start: List of indices to start reading from each dimension
    :param count: List of number of data to read from each dimension
    :param stride: List of strides to read from each dimension
    :return: A numpy array containing the raw data with missing data is replaced by NaN.
    """
    from cis.utils import create_masked_array_for_missing_data, listify
    from cis.data_io.products.MODIS import _apply_scaling_factor_MODIS
    from numpy.ma import masked_outside

    start = [] if start is None else listify(start)
    count = [] if count is None else listify(count)
    stride = [] if stride is None else listify(stride)
    _, ndim, dim_len, _, _ = sds.info()

    # Assume full read of all omitted dimensions
    while len(start) < ndim:
        start += [0]
    while len(count) < ndim:
        count += [-1]
    while len(stride) < ndim:
        stride += [1]

    # Allow lazy notation for "read all"
    count = [
        n if n >= 0 else l - x0 for x0, n, l in zip(start, count, dim_len)
    ]

    data = sds.get(start, count, stride).squeeze()
    attributes = sds.attributes()

    # Apply Fill Value
    missing_value = attributes.get('_FillValue', None)
    if missing_value is not None:
        data = create_masked_array_for_missing_data(data, missing_value)

    # Check for valid_range
    valid_range = attributes.get('valid_range', None)
    if valid_range is not None:
        data = masked_outside(data, *valid_range)

    # Offsets and scaling.
    add_offset = attributes.get('add_offset', 0.0)
    scale_factor = attributes.get('scale_factor', 1.0)
    data = _apply_scaling_factor_MODIS(data, scale_factor, add_offset)

    return data
コード例 #8
0
ファイル: caliop.py プロジェクト: cpaulik/cis
    def get_calipso_data(self, sds):
        """
        Reads raw data from an SD instance. Automatically applies the
        scaling factors and offsets to the data arrays found in Calipso data.

        Returns:
            A numpy array containing the raw data with missing data is replaced by NaN.

        Arguments:
            sds        -- The specific sds instance to read

        """
        from cis.utils import create_masked_array_for_missing_data

        calipso_fill_values = {'Float_32': -9999.0,
                               # 'Int_8' : 'See SDS description',
                               'Int_16': -9999,
                               'Int_32': -9999,
                               'UInt_8': -127,
                               # 'UInt_16' : 'See SDS description',
                               # 'UInt_32' : 'See SDS description',
                               'ExtinctionQC Fill Value': 32768,
                               'FeatureFinderQC No Features Found': 32767,
                               'FeatureFinderQC Fill Value': 65535}

        data = sds.get()
        attributes = sds.attributes()

        # Missing data.
        missing_val = attributes.get('fillvalue', None)
        if missing_val is None:
            try:
                missing_val = calipso_fill_values[attributes.get('format', None)]
            except KeyError:
                # Last guess
                missing_val = attributes.get('_FillValue', None)

        data = create_masked_array_for_missing_data(data, missing_val)

        # Offsets and scaling.
        offset = attributes.get('add_offset', 0)
        scale_factor = attributes.get('scale_factor', 1)
        data = self.apply_scaling_factor_CALIPSO(data, scale_factor, offset)

        return data
コード例 #9
0
    def _get_cloudsat_vds_data(self, vds):
        from cis.data_io.hdf_vd import _get_attribute_value, HDF, HDF4Error
        from cis.utils import create_masked_array_for_missing_data
        import numpy as np

        # get file and variable reference from tuple
        filename = vds.filename
        variable = vds.variable

        try:
            datafile = HDF(filename)
        except HDF4Error as e:
            raise IOError(e)

        vs = datafile.vstart()
        vd = vs.attach(variable)
        data = vd.read(nRec=vd.inquire()[0])

        # create numpy array from data
        data = np.array(data).flatten()

        missing_value = _get_attribute_value(vd, 'missing', None)

        if missing_value is not None:
            data = create_masked_array_for_missing_data(data, missing_value)

        valid_range = _get_attribute_value(vd, "valid_range")
        if valid_range is not None:
            # Assume it's the right data type already
            data = np.ma.masked_outside(data, *valid_range)

        # TODO This probably won't work....
        factor = _get_attribute_value(vd, "factor", 1)
        offset = _get_attribute_value(vd, "offset", 0)
        data = self._apply_scaling_factor_CLOUDSAT(data, factor, offset)

        # detach and close
        vd.detach()
        vs.end()
        datafile.close()

        return data
コード例 #10
0
    def _get_cloudsat_vds_data(self, vds):
        from cis.data_io.hdf_vd import _get_attribute_value, HDF, HDF4Error
        from cis.utils import create_masked_array_for_missing_data
        import numpy as np

        # get file and variable reference from tuple
        filename = vds.filename
        variable = vds.variable

        try:
            datafile = HDF(filename)
        except HDF4Error as e:
            raise IOError(e)

        vs = datafile.vstart()
        vd = vs.attach(variable)
        data = vd.read(nRec=vd.inquire()[0])

        # create numpy array from data
        data = np.array(data).flatten()

        missing_value = _get_attribute_value(vd, 'missing', None)

        if missing_value is not None:
            data = create_masked_array_for_missing_data(data, missing_value)

        valid_range = _get_attribute_value(vd, "valid_range")
        if valid_range is not None:
            # Assume it's the right data type already
            data = np.ma.masked_outside(data, *valid_range)

        # TODO This probably won't work....
        factor = _get_attribute_value(vd, "factor", 1)
        offset = _get_attribute_value(vd, "offset", 0)
        data = self._apply_scaling_factor_CLOUDSAT(data, factor, offset)

        # detach and close
        vd.detach()
        vs.end()
        datafile.close()

        return data
コード例 #11
0
    def _get_cloudsat_sds_data(self, sds):
        """
        Reads raw data from an SD instance. Automatically applies the
        scaling factors and offsets to the data arrays often found in NASA HDF-EOS
        data (e.g. MODIS)

        :param sds: The specific sds instance to read
        :return: A numpy array containing the raw data with missing data is replaced by NaN.
        """
        from cis.utils import create_masked_array_for_missing_data
        import numpy as np
        from cis.data_io.hdf_vd import VDS, get_data
        from pyhdf.error import HDF4Error
        data = sds.get()
        attributes = sds.attributes()

        # First deal with the Fill value
        fill_value = attributes.get('_FillValue', None)

        if fill_value is not None:
            data = create_masked_array_for_missing_data(data, fill_value)

        # TODO: This needs some explict integration and unit tests
        # Then deal with missing values
        missop_fn = {'<': np.ma.masked_less,
                     '<=': np.ma.masked_less_equal,
                     '==': np.ma.masked_equal,
                     '=>': np.ma.masked_greater_equal,
                     '>': np.ma.masked_greater,
                     # TODO Note that this is wrong but seems to be what is meant, for Cloud_Effective_Radius at
                     # least...
                     'ge': np.ma.masked_equal,
                     'eq': np.ma.masked_equal}

        missing = attributes.get('missing', None)
        missop = attributes.get('missop', None)
        if missing is not None and missop is not None:
            try:
                logging.debug("Masking all values v {} {}".format(missop, missing))
                data = missop_fn[missop](data, missing)
            except KeyError:
                logging.warning("Unable to identify missop {}, unable to "
                                "mask missing values for {}.".format(missop, sds.info()[0]))

        # Now handle valid range mask
        valid_range = attributes.get('valid_range', None)
        if valid_range is not None:
            # Assume it's the right data type already
            logging.debug("Masking all values {} > v > {}.".format(*valid_range))
            data = np.ma.masked_outside(data, *valid_range)

        # Offsets and scaling - these come from Vdata variables with the appropraite suffixes
        try:
            offset = get_data(VDS(sds._filename, sds._variable + "_add_offset"))[0]
        except HDF4Error:
            print("WARNING: Couldn't find offset variable " + sds._variable + "_add_offset")
            offset = 0
        try:
            scale_factor = get_data(VDS(sds._filename, sds._variable + "_scale_factor"))[0]
        except HDF4Error:
            print("WARNING: Couldn't find scale factor variable " + sds._variable + "_scale_factor")
            scale_factor = 1

        data = self._apply_scaling_factor_CLOUDSAT(data, scale_factor, offset)

        return data
コード例 #12
0
    def _get_cloudsat_sds_data(self, sds):
        """
        Reads raw data from an SD instance. Automatically applies the
        scaling factors and offsets to the data arrays often found in NASA HDF-EOS
        data (e.g. MODIS)

        :param sds: The specific sds instance to read
        :return: A numpy array containing the raw data with missing data is replaced by NaN.
        """
        from cis.utils import create_masked_array_for_missing_data
        import numpy as np
        from cis.data_io.hdf_vd import VDS, get_data
        from pyhdf.error import HDF4Error
        data = sds.get()
        attributes = sds.attributes()

        # First deal with the Fill value
        fill_value = attributes.get('_FillValue', None)

        if fill_value is not None:
            data = create_masked_array_for_missing_data(data, fill_value)

        # TODO: This needs some explict integration and unit tests
        # Then deal with missing values
        missop_fn = {
            '<': np.ma.masked_less,
            '<=': np.ma.masked_less_equal,
            '==': np.ma.masked_equal,
            '=>': np.ma.masked_greater_equal,
            '>': np.ma.masked_greater,
            # TODO Note that this is wrong but seems to be what is meant, for Cloud_Effective_Radius at
            # least...
            'ge': np.ma.masked_equal,
            'eq': np.ma.masked_equal
        }

        missing = attributes.get('missing', None)
        missop = attributes.get('missop', None)
        if missing is not None and missop is not None:
            try:
                logging.debug("Masking all values v {} {}".format(
                    missop, missing))
                data = missop_fn[missop](data, missing)
            except KeyError:
                logging.warning("Unable to identify missop {}, unable to "
                                "mask missing values for {}.".format(
                                    missop,
                                    sds.info()[0]))

        # Now handle valid range mask
        valid_range = attributes.get('valid_range', None)
        if valid_range is not None:
            # Assume it's the right data type already
            logging.debug(
                "Masking all values {} > v > {}.".format(*valid_range))
            data = np.ma.masked_outside(data, *valid_range)

        # Offsets and scaling - these come from Vdata variables with the appropraite suffixes
        try:
            offset = get_data(VDS(sds._filename,
                                  sds._variable + "_add_offset"))[0]
        except HDF4Error:
            print("WARNING: Couldn't find offset variable " + sds._variable +
                  "_add_offset")
            offset = 0
        try:
            scale_factor = get_data(
                VDS(sds._filename, sds._variable + "_scale_factor"))[0]
        except HDF4Error:
            print("WARNING: Couldn't find scale factor variable " +
                  sds._variable + "_scale_factor")
            scale_factor = 1

        data = self._apply_scaling_factor_CLOUDSAT(data, scale_factor, offset)

        return data