예제 #1
0
    def _create_cube(self, filenames, variable):
        import numpy as np
        from cis.data_io.hdf import _read_hdf4
        from cis.data_io import hdf_vd
        from iris.cube import Cube, CubeList
        from iris.coords import DimCoord, AuxCoord
        from cis.time_util import calculate_mid_time, cis_standard_time_unit
        from cis.data_io.hdf_sd import get_metadata
        from cf_units import Unit

        variables = ['XDim:GlobalGrid', 'YDim:GlobalGrid', variable]
        logging.info("Listing coordinates: " + str(variables))

        cube_list = CubeList()
        # Read each file individually, let Iris do the merging at the end.
        for f in filenames:
            sdata, vdata = _read_hdf4(f, variables)

            lat_points = np.linspace(-90., 90., hdf_vd.get_data(vdata['YDim:GlobalGrid']))
            lon_points = np.linspace(-180., 180., hdf_vd.get_data(vdata['XDim:GlobalGrid']))

            lat_coord = DimCoord(lat_points, standard_name='latitude', units='degrees')
            lon_coord = DimCoord(lon_points, standard_name='longitude', units='degrees')

            # create time coordinate using the midpoint of the time delta between the start date and the end date
            start_datetime = self._get_start_date(f)
            end_datetime = self._get_end_date(f)
            mid_datetime = calculate_mid_time(start_datetime, end_datetime)
            logging.debug("Using {} as datetime for file {}".format(mid_datetime, f))
            time_coord = AuxCoord(mid_datetime, standard_name='time', units=cis_standard_time_unit,
                                  bounds=[start_datetime, end_datetime])

            var = sdata[variable]
            metadata = get_metadata(var)

            try:
                units = Unit(metadata.units)
            except ValueError:
                logging.warning("Unable to parse units '{}' in {} for {}.".format(metadata.units, f, variable))
                units = None

            cube = Cube(_get_MODIS_SDS_data(sdata[variable]),
                        dim_coords_and_dims=[(lon_coord, 1), (lat_coord, 0)],
                        aux_coords_and_dims=[(time_coord, None)],
                        var_name=metadata._name, long_name=metadata.long_name, units=units)

            cube_list.append(cube)

        # Merge the cube list across the scalar time coordinates before returning a single cube.
        return cube_list.merge_cube()
예제 #2
0
    def _generate_time_array(self, vdata):
        import cis.data_io.hdf_vd as hdf_vd
        import datetime as dt
        from cis.time_util import convert_sec_since_to_std_time

        Cloudsat_start_time = dt.datetime(1993, 1, 1, 0, 0, 0)

        arrays = []
        for i, j in zip(vdata['Profile_time'], vdata['TAI_start']):
            time = hdf_vd.get_data(i)
            start = hdf_vd.get_data(j)
            time += start
            # Do the conversion to standard time here before we expand the time array...
            time = convert_sec_since_to_std_time(time, Cloudsat_start_time)
            arrays.append(time)
        return utils.concatenate(arrays)
예제 #3
0
    def get_variable_names(self, filenames, data_type=None):
        try:
            from pyhdf.SD import SD
        except ImportError:
            raise ImportError(
                "HDF support was not installed, please reinstall with pyhdf to read HDF files."
            )

        variables = set([])

        # Determine the valid shape for variables
        sd = SD(filenames[0])
        datasets = sd.datasets()
        len_x = datasets['Latitude'][1][
            0]  # Assumes that latitude shape == longitude shape (it should)
        alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True)
        len_y = alt_data.shape[0]
        valid_shape = (len_x, len_y)

        for filename in filenames:
            sd = SD(filename)
            for var_name, var_info in sd.datasets().items():
                if var_info[1] == valid_shape:
                    variables.add(var_name)

        return variables
예제 #4
0
    def _generate_time_array(self, vdata):
        import cis.data_io.hdf_vd as hdf_vd
        import datetime as dt
        from cis.time_util import convert_sec_since_to_std_time

        Cloudsat_start_time = dt.datetime(1993, 1, 1, 0, 0, 0)

        arrays = []
        for i, j in zip(vdata['Profile_time'], vdata['TAI_start']):
            time = hdf_vd.get_data(i)
            start = hdf_vd.get_data(j)
            time += start
            # Do the conversion to standard time here before we expand the time array...
            time = convert_sec_since_to_std_time(time, Cloudsat_start_time)
            arrays.append(time)
        return utils.concatenate(arrays)
예제 #5
0
파일: hdf.py 프로젝트: gitter-badger/cis-1
def read_data(data_dict, data_type, missing_values=None):
    if data_type == 'VD':
        out = utils.concatenate([hdf_vd.get_data(i, missing_values=missing_values) for i in data_dict])
    elif data_type == 'SD':
        out = utils.concatenate([hdf_sd.get_data(i, missing_values=missing_values) for i in data_dict])
    else:
        raise ValueError("Invalid data-type: %s, HDF variables must be VD or SD only" % data_type)
    return out
예제 #6
0
파일: hdf.py 프로젝트: cedadev/cis
def read_data(data_list, read_function):
    """
    Wrapper for calling an HDF reading function for each dataset, and then concatenating the result.

    :param list data_list: A list of data objects to read
    :param callable or str read_function: A function for reading the data, or 'SD' or 'VD' for default reading routines.
    :return: A single numpy array of concatenated data values.
    """
    if callable(read_function):
        out = utils.concatenate([read_function(i) for i in data_list])
    elif read_function == 'VD':
        out = utils.concatenate([hdf_vd.get_data(i) for i in data_list])
    elif read_function == 'SD':
        out = utils.concatenate([hdf_sd.get_data(i) for i in data_list])
    else:
        raise ValueError("Invalid read-function: {}, please supply a callable read "
                         "function, 'VD' or 'SD' only".format(read_function))
    return out
예제 #7
0
def read_data(data_list, read_function):
    """
    Wrapper for calling an HDF reading function for each dataset, and then concatenating the result.

    :param list data_list: A list of data objects to read
    :param callable or str read_function: A function for reading the data, or 'SD' or 'VD' for default reading routines.
    :return: A single numpy array of concatenated data values.
    """
    if callable(read_function):
        out = utils.concatenate([read_function(i) for i in data_list])
    elif read_function == 'VD':
        out = utils.concatenate([hdf_vd.get_data(i) for i in data_list])
    elif read_function == 'SD':
        out = utils.concatenate([hdf_sd.get_data(i) for i in data_list])
    else:
        raise ValueError(
            "Invalid read-function: {}, please supply a callable read "
            "function, 'VD' or 'SD' only".format(read_function))
    return out
예제 #8
0
    def get_variable_names(self, filenames, data_type=None):
        try:
            from pyhdf.SD import SD
        except ImportError:
            raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.")

        variables = set([])

        # Determine the valid shape for variables
        sd = SD(filenames[0])
        datasets = sd.datasets()
        len_x = datasets['Latitude'][1][0]  # Assumes that latitude shape == longitude shape (it should)
        alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True)
        len_y = alt_data.shape[0]
        valid_shape = (len_x, len_y)

        for filename in filenames:
            sd = SD(filename)
            for var_name, var_info in sd.datasets().items():
                if var_info[1] == valid_shape:
                    variables.add(var_name)

        return variables
예제 #9
0
    def create_data_object(self, filenames, variable, index_offset=1):
        from cis.data_io.hdf_vd import get_data
        from cis.data_io.hdf_vd import VDS
        from pyhdf.error import HDF4Error
        from cis.data_io import hdf_sd
        from iris.coords import DimCoord, AuxCoord
        from iris.cube import Cube
        from cis.data_io.gridded_data import GriddedData
        from cis.time_util import cis_standard_time_unit

        logging.debug("Creating data object for variable " + variable)

        variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"]
        logging.info("Listing coordinates: " + str(variables))

        variables.append(variable)

        # reading data from files
        sdata = {}
        for filename in filenames:
            try:
                sds_dict = hdf_sd.read(filename, variables)
            except HDF4Error as e:
                raise IOError(str(e))

            for var in list(sds_dict.keys()):
                utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])

        alt_name = "altitude"
        logging.info("Additional coordinates: '" + alt_name + "'")

        # work out size of data arrays
        # the coordinate variables will be reshaped to match that.
        # NOTE: This assumes that all Caliop_L1 files have the same altitudes.
        #       If this is not the case, then the following line will need to be changed
        #       to concatenate the data from all the files and not just arbitrarily pick
        #       the altitudes from the first file.
        alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True)

        alt_coord = DimCoord(alt_data, standard_name='altitude', units='km')
        alt_coord.convert_units('m')

        lat_data = hdf.read_data(sdata['Latitude'],
                                 self._get_calipso_data)[:, index_offset]
        lat_coord = AuxCoord(lat_data, standard_name='latitude')

        pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data)
        pres_coord = AuxCoord(pres_data,
                              standard_name='air_pressure',
                              units='hPa')

        # longitude
        lon = sdata['Longitude']
        lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset]
        lon_coord = AuxCoord(lon_data, standard_name='longitude')

        # profile time, x
        time = sdata['Profile_Time']
        time_data = hdf.read_data(time, self._get_calipso_data)[:,
                                                                index_offset]
        time_coord = DimCoord(time_data,
                              long_name='Profile_Time',
                              standard_name='time',
                              units="seconds since 1993-01-01 00:00:00")
        time_coord.convert_units(cis_standard_time_unit)

        # retrieve data + its metadata
        var = sdata[variable]
        metadata = hdf.read_metadata(var, "SD")

        if variable in MIXED_RESOLUTION_VARIABLES:
            logging.warning(
                "Using Level 2 resolution profile for mixed resolution variable {}. See CALIPSO "
                "documentation for more details".format(variable))
            data = hdf.read_data(var, self._get_mixed_resolution_calipso_data)
        else:
            data = hdf.read_data(var, self._get_calipso_data)

        cube = Cube(data,
                    long_name=metadata.long_name,
                    units=self.clean_units(metadata.units),
                    dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)],
                    aux_coords_and_dims=[(lat_coord, (0, )),
                                         (lon_coord, (0, )),
                                         (pres_coord, (0, 1))])
        gd = GriddedData.make_from_cube(cube)
        return gd
예제 #10
0
    def _get_cloudsat_sds_data(self, sds):
        """
        Reads raw data from an SD instance. Automatically applies the
        scaling factors and offsets to the data arrays often found in NASA HDF-EOS
        data (e.g. MODIS)

        :param sds: The specific sds instance to read
        :return: A numpy array containing the raw data with missing data is replaced by NaN.
        """
        from cis.utils import create_masked_array_for_missing_data
        import numpy as np
        from cis.data_io.hdf_vd import VDS, get_data
        from pyhdf.error import HDF4Error
        data = sds.get()
        attributes = sds.attributes()

        # First deal with the Fill value
        fill_value = attributes.get('_FillValue', None)

        if fill_value is not None:
            data = create_masked_array_for_missing_data(data, fill_value)

        # TODO: This needs some explict integration and unit tests
        # Then deal with missing values
        missop_fn = {'<': np.ma.masked_less,
                     '<=': np.ma.masked_less_equal,
                     '==': np.ma.masked_equal,
                     '=>': np.ma.masked_greater_equal,
                     '>': np.ma.masked_greater,
                     # TODO Note that this is wrong but seems to be what is meant, for Cloud_Effective_Radius at
                     # least...
                     'ge': np.ma.masked_equal,
                     'eq': np.ma.masked_equal}

        missing = attributes.get('missing', None)
        missop = attributes.get('missop', None)
        if missing is not None and missop is not None:
            try:
                logging.debug("Masking all values v {} {}".format(missop, missing))
                data = missop_fn[missop](data, missing)
            except KeyError:
                logging.warning("Unable to identify missop {}, unable to "
                                "mask missing values for {}.".format(missop, sds.info()[0]))

        # Now handle valid range mask
        valid_range = attributes.get('valid_range', None)
        if valid_range is not None:
            # Assume it's the right data type already
            logging.debug("Masking all values {} > v > {}.".format(*valid_range))
            data = np.ma.masked_outside(data, *valid_range)

        # Offsets and scaling - these come from Vdata variables with the appropraite suffixes
        try:
            offset = get_data(VDS(sds._filename, sds._variable + "_add_offset"))[0]
        except HDF4Error:
            print("WARNING: Couldn't find offset variable " + sds._variable + "_add_offset")
            offset = 0
        try:
            scale_factor = get_data(VDS(sds._filename, sds._variable + "_scale_factor"))[0]
        except HDF4Error:
            print("WARNING: Couldn't find scale factor variable " + sds._variable + "_scale_factor")
            scale_factor = 1

        data = self._apply_scaling_factor_CLOUDSAT(data, scale_factor, offset)

        return data
예제 #11
0
def test_that_can_get_data():
    vds_dict = hdf_vd.read(valid_hdf_vd_file, 'DEM_elevation')
    vds = vds_dict['DEM_elevation']
    data = hdf_vd.get_data(vds)
    eq_(37081, len(data))
예제 #12
0
    def _create_cube(self, filenames, variable):
        import numpy as np
        from cis.data_io.hdf import _read_hdf4
        from cis.data_io import hdf_vd
        from iris.cube import Cube, CubeList
        from iris.coords import DimCoord, AuxCoord
        from cis.time_util import calculate_mid_time, cis_standard_time_unit
        from cis.data_io.hdf_sd import get_metadata
        from cf_units import Unit

        variables = ['XDim:GlobalGrid', 'YDim:GlobalGrid', variable]
        logging.info("Listing coordinates: " + str(variables))

        cube_list = CubeList()
        # Read each file individually, let Iris do the merging at the end.
        for f in filenames:
            sdata, vdata = _read_hdf4(f, variables)

            lat_points = np.linspace(-90., 90.,
                                     hdf_vd.get_data(vdata['YDim:GlobalGrid']))
            lon_points = np.linspace(-180., 180.,
                                     hdf_vd.get_data(vdata['XDim:GlobalGrid']))

            lat_coord = DimCoord(lat_points,
                                 standard_name='latitude',
                                 units='degrees')
            lon_coord = DimCoord(lon_points,
                                 standard_name='longitude',
                                 units='degrees')

            # create time coordinate using the midpoint of the time delta between the start date and the end date
            start_datetime = self._get_start_date(f)
            end_datetime = self._get_end_date(f)
            mid_datetime = calculate_mid_time(start_datetime, end_datetime)
            logging.debug("Using {} as datetime for file {}".format(
                mid_datetime, f))
            time_coord = AuxCoord(mid_datetime,
                                  standard_name='time',
                                  units=cis_standard_time_unit,
                                  bounds=[start_datetime, end_datetime])

            var = sdata[variable]
            metadata = get_metadata(var)

            try:
                units = Unit(metadata.units)
            except ValueError:
                logging.warning(
                    "Unable to parse units '{}' in {} for {}.".format(
                        metadata.units, f, variable))
                units = None

            cube = Cube(_get_MODIS_SDS_data(sdata[variable]),
                        dim_coords_and_dims=[(lon_coord, 1), (lat_coord, 0)],
                        aux_coords_and_dims=[(time_coord, None)],
                        var_name=metadata._name,
                        long_name=metadata.long_name,
                        units=units)

            cube_list.append(cube)

        # Merge the cube list across the scalar time coordinates before returning a single cube.
        return cube_list.merge_cube()
예제 #13
0
    def create_data_object(self, filenames, variable, index_offset=1):
        from cis.data_io.hdf_vd import get_data
        from cis.data_io.hdf_vd import VDS
        from pyhdf.error import HDF4Error
        from cis.data_io import hdf_sd
        from iris.coords import DimCoord, AuxCoord
        from iris.cube import Cube
        from cis.data_io.gridded_data import GriddedData
        from cis.time_util import cis_standard_time_unit

        logging.debug("Creating data object for variable " + variable)

        variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"]
        logging.info("Listing coordinates: " + str(variables))

        variables.append(variable)

        # reading data from files
        sdata = {}
        for filename in filenames:
            try:
                sds_dict = hdf_sd.read(filename, variables)
            except HDF4Error as e:
                raise IOError(str(e))

            for var in list(sds_dict.keys()):
                utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])

        alt_name = "altitude"
        logging.info("Additional coordinates: '" + alt_name + "'")

        # work out size of data arrays
        # the coordinate variables will be reshaped to match that.
        # NOTE: This assumes that all Caliop_L1 files have the same altitudes.
        #       If this is not the case, then the following line will need to be changed
        #       to concatenate the data from all the files and not just arbitrarily pick
        #       the altitudes from the first file.
        alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True)

        alt_coord = DimCoord(alt_data, standard_name='altitude', units='km')
        alt_coord.convert_units('m')

        lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset]
        lat_coord = AuxCoord(lat_data, standard_name='latitude')

        pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data)
        pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa')

        # longitude
        lon = sdata['Longitude']
        lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset]
        lon_coord = AuxCoord(lon_data, standard_name='longitude')

        # profile time, x
        time = sdata['Profile_Time']
        time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset]
        time_coord = DimCoord(time_data, long_name='Profile_Time', standard_name='time',
                              units="seconds since 1993-01-01 00:00:00")
        time_coord.convert_units(cis_standard_time_unit)

        # retrieve data + its metadata
        var = sdata[variable]
        metadata = hdf.read_metadata(var, "SD")

        if variable in MIXED_RESOLUTION_VARIABLES:
            logging.warning("Using Level 2 resolution profile for mixed resolution variable {}. See CALIPSO "
                            "documentation for more details".format(variable))
            data = hdf.read_data(var, self._get_mixed_resolution_calipso_data)
        else:
            data = hdf.read_data(var, self._get_calipso_data)

        cube = Cube(data, long_name=metadata.long_name, units=self.clean_units(metadata.units),
                    dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)], aux_coords_and_dims=[(lat_coord, (0,)),
                                                                                                (lon_coord, (0,)),
                                                                                                (pres_coord, (0, 1))])
        gd = GriddedData.make_from_cube(cube)
        return gd
예제 #14
0
    def create_data_object(self, filenames, variable, index_offset=1):
        from cis.data_io.hdf_vd import get_data
        from cis.data_io.hdf_vd import VDS
        from pyhdf.error import HDF4Error
        from cis.data_io import hdf_sd
        from iris.coords import DimCoord, AuxCoord
        from iris.cube import Cube, CubeList
        from cis.data_io.gridded_data import GriddedData
        from cis.time_util import cis_standard_time_unit
        from datetime import datetime
        from iris.util import new_axis
        import numpy as np

        logging.debug("Creating data object for variable " + variable)

        variables = ["Pressure_Mean"]
        logging.info("Listing coordinates: " + str(variables))

        variables.append(variable)

        # reading data from files
        sdata = {}
        for filename in filenames:
            try:
                sds_dict = hdf_sd.read(filename, variables)
            except HDF4Error as e:
                raise IOError(str(e))

            for var in list(sds_dict.keys()):
                utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])

        # work out size of data arrays
        # the coordinate variables will be reshaped to match that.
        # NOTE: This assumes that all Caliop_L1 files have the same altitudes.
        #       If this is not the case, then the following line will need to be changed
        #       to concatenate the data from all the files and not just arbitrarily pick
        #       the altitudes from the first file.
        alt_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :]
        alt_coord = DimCoord(alt_data, standard_name='altitude', units='km')
        alt_coord.convert_units('m')

        lat_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :]
        lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north')

        lon_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :]
        lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east')

        cubes = CubeList()
        for f in filenames:
            t = get_data(VDS(f, "Nominal_Year_Month"), True)[0]
            time_data = cis_standard_time_unit.date2num(datetime(int(t[0:4]), int(t[4:6]), 15))
            time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time',
                                  units=cis_standard_time_unit)

            # retrieve data + its metadata
            var = sdata[variable]
            metadata = hdf.read_metadata(var, "SD")

            data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable))

            pres_data = self._get_calipso_data(hdf_sd.HDF_SDS(f, 'Pressure_Mean'))
            pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa')

            if data.ndim == 2:
                # pres_coord = new_axis()
                cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                cubes.append(new_cube)
            elif data.ndim == 3:
                # pres_coord = new_axis()
                cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                # Then add the (extended) pressure coord so that it is explicitly a function of time
                new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3))
                cubes.append(new_cube)
            else:
                raise ValueError("Unexpected number of dimensions for CALIOP data: {}".format(data.ndim))


        # Concatenate the cubes from each file into a single GriddedData object
        gd = GriddedData.make_from_cube(cubes.concatenate_cube())
        return gd
예제 #15
0
def test_that_can_get_data():
    vds_dict = hdf_vd.read(escape_colons(valid_hdf_vd_file), 'DEM_elevation')
    vds = vds_dict['DEM_elevation']
    data = hdf_vd.get_data(vds)
    eq_(37081, len(data))
예제 #16
0
    def _get_cloudsat_sds_data(self, sds):
        """
        Reads raw data from an SD instance. Automatically applies the
        scaling factors and offsets to the data arrays often found in NASA HDF-EOS
        data (e.g. MODIS)

        :param sds: The specific sds instance to read
        :return: A numpy array containing the raw data with missing data is replaced by NaN.
        """
        from cis.utils import create_masked_array_for_missing_data
        import numpy as np
        from cis.data_io.hdf_vd import VDS, get_data
        from pyhdf.error import HDF4Error
        data = sds.get()
        attributes = sds.attributes()

        # First deal with the Fill value
        fill_value = attributes.get('_FillValue', None)

        if fill_value is not None:
            data = create_masked_array_for_missing_data(data, fill_value)

        # TODO: This needs some explict integration and unit tests
        # Then deal with missing values
        missop_fn = {
            '<': np.ma.masked_less,
            '<=': np.ma.masked_less_equal,
            '==': np.ma.masked_equal,
            '=>': np.ma.masked_greater_equal,
            '>': np.ma.masked_greater,
            # TODO Note that this is wrong but seems to be what is meant, for Cloud_Effective_Radius at
            # least...
            'ge': np.ma.masked_equal,
            'eq': np.ma.masked_equal
        }

        missing = attributes.get('missing', None)
        missop = attributes.get('missop', None)
        if missing is not None and missop is not None:
            try:
                logging.debug("Masking all values v {} {}".format(
                    missop, missing))
                data = missop_fn[missop](data, missing)
            except KeyError:
                logging.warning("Unable to identify missop {}, unable to "
                                "mask missing values for {}.".format(
                                    missop,
                                    sds.info()[0]))

        # Now handle valid range mask
        valid_range = attributes.get('valid_range', None)
        if valid_range is not None:
            # Assume it's the right data type already
            logging.debug(
                "Masking all values {} > v > {}.".format(*valid_range))
            data = np.ma.masked_outside(data, *valid_range)

        # Offsets and scaling - these come from Vdata variables with the appropraite suffixes
        try:
            offset = get_data(VDS(sds._filename,
                                  sds._variable + "_add_offset"))[0]
        except HDF4Error:
            print("WARNING: Couldn't find offset variable " + sds._variable +
                  "_add_offset")
            offset = 0
        try:
            scale_factor = get_data(
                VDS(sds._filename, sds._variable + "_scale_factor"))[0]
        except HDF4Error:
            print("WARNING: Couldn't find scale factor variable " +
                  sds._variable + "_scale_factor")
            scale_factor = 1

        data = self._apply_scaling_factor_CLOUDSAT(data, scale_factor, offset)

        return data
예제 #17
0
    def create_data_object(self, filenames, variable, index_offset=1):
        from cis.data_io.hdf_vd import get_data
        from cis.data_io.hdf_vd import VDS
        from pyhdf.error import HDF4Error
        from cis.data_io import hdf_sd
        from iris.coords import DimCoord, AuxCoord
        from iris.cube import Cube, CubeList
        from cis.data_io.gridded_data import GriddedData
        from cis.time_util import cis_standard_time_unit
        from datetime import datetime
        from iris.util import new_axis
        import numpy as np

        logging.debug("Creating data object for variable " + variable)

        variables = ["Pressure_Mean"]
        logging.info("Listing coordinates: " + str(variables))

        variables.append(variable)

        # reading data from files
        sdata = {}
        for filename in filenames:
            try:
                sds_dict = hdf_sd.read(filename, variables)
            except HDF4Error as e:
                raise IOError(str(e))

            for var in list(sds_dict.keys()):
                utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])

        # work out size of data arrays
        # the coordinate variables will be reshaped to match that.
        # NOTE: This assumes that all Caliop_L1 files have the same altitudes.
        #       If this is not the case, then the following line will need to be changed
        #       to concatenate the data from all the files and not just arbitrarily pick
        #       the altitudes from the first file.
        alt_data = self._get_calipso_data(
            hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :]
        alt_coord = DimCoord(alt_data, standard_name='altitude', units='km')
        alt_coord.convert_units('m')

        lat_data = self._get_calipso_data(
            hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :]
        lat_coord = DimCoord(lat_data,
                             standard_name='latitude',
                             units='degrees_north')

        lon_data = self._get_calipso_data(
            hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :]
        lon_coord = DimCoord(lon_data,
                             standard_name='longitude',
                             units='degrees_east')

        cubes = CubeList()
        for f in filenames:
            t = get_data(VDS(f, "Nominal_Year_Month"), True)[0]
            time_data = cis_standard_time_unit.date2num(
                datetime(int(t[0:4]), int(t[4:6]), 15))
            time_coord = AuxCoord(time_data,
                                  long_name='Profile_Time',
                                  standard_name='time',
                                  units=cis_standard_time_unit)

            # retrieve data + its metadata
            var = sdata[variable]
            metadata = hdf.read_metadata(var, "SD")

            data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable))

            pres_data = self._get_calipso_data(
                hdf_sd.HDF_SDS(f, 'Pressure_Mean'))
            pres_coord = AuxCoord(pres_data,
                                  standard_name='air_pressure',
                                  units='hPa')

            if data.ndim == 2:
                # pres_coord = new_axis()
                cube = Cube(data,
                            long_name=metadata.long_name or variable,
                            units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0),
                                                 (lon_coord, 1)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                cubes.append(new_cube)
            elif data.ndim == 3:
                # pres_coord = new_axis()
                cube = Cube(data,
                            long_name=metadata.long_name or variable,
                            units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0),
                                                 (lon_coord, 1),
                                                 (alt_coord, 2)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                # Then add the (extended) pressure coord so that it is explicitly a function of time
                new_cube.add_aux_coord(pres_coord[np.newaxis, ...],
                                       (0, 1, 2, 3))
                cubes.append(new_cube)
            else:
                raise ValueError(
                    "Unexpected number of dimensions for CALIOP data: {}".
                    format(data.ndim))

        # Concatenate the cubes from each file into a single GriddedData object
        gd = GriddedData.make_from_cube(cubes.concatenate_cube())
        return gd
예제 #18
0
    def _create_coord_list(self, filenames, index_offset=0):
        from cis.data_io.hdf_vd import get_data
        from cis.data_io.hdf_vd import VDS
        from pyhdf.error import HDF4Error
        from cis.data_io import hdf_sd
        import datetime as dt
        from cis.time_util import convert_sec_since_to_std_time, cis_standard_time_unit

        variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"]
        logging.info("Listing coordinates: " + str(variables))

        # reading data from files
        sdata = {}
        for filename in filenames:
            try:
                sds_dict = hdf_sd.read(filename, variables)
            except HDF4Error as e:
                raise IOError(str(e))

            for var in list(sds_dict.keys()):
                utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])

        alt_name = "altitude"
        logging.info("Additional coordinates: '" + alt_name + "'")

        # work out size of data arrays
        # the coordinate variables will be reshaped to match that.
        # NOTE: This assumes that all Caliop_L1 files have the same altitudes.
        #       If this is not the case, then the following line will need to be changed
        #       to concatenate the data from all the files and not just arbitrarily pick
        #       the altitudes from the first file.
        alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True)
        alt_data *= 1000.0  # Convert to m
        len_x = alt_data.shape[0]

        lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)
        len_y = lat_data.shape[0]

        new_shape = (len_x, len_y)

        # altitude
        alt_data = utils.expand_1d_to_2d_array(alt_data, len_y, axis=0)
        alt_metadata = Metadata(name=alt_name, standard_name=alt_name, shape=new_shape)
        alt_coord = Coord(alt_data, alt_metadata)

        # pressure
        if self.include_pressure:
            pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data)
            pres_metadata = hdf.read_metadata(sdata['Pressure'], "SD")
            # Fix badly formatted units which aren't CF compliant and will break if they are aggregated
            if str(pres_metadata.units) == "hPA":
                pres_metadata.units = "hPa"
            pres_metadata.shape = new_shape
            pres_coord = Coord(pres_data, pres_metadata, 'P')

        # latitude
        lat_data = utils.expand_1d_to_2d_array(lat_data[:, index_offset], len_x, axis=1)
        lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD")
        lat_metadata.shape = new_shape
        lat_coord = Coord(lat_data, lat_metadata, 'Y')

        # longitude
        lon = sdata['Longitude']
        lon_data = hdf.read_data(lon, self._get_calipso_data)
        lon_data = utils.expand_1d_to_2d_array(lon_data[:, index_offset], len_x, axis=1)
        lon_metadata = hdf.read_metadata(lon, "SD")
        lon_metadata.shape = new_shape
        lon_coord = Coord(lon_data, lon_metadata, 'X')

        # profile time, x
        time = sdata['Profile_Time']
        time_data = hdf.read_data(time, self._get_calipso_data)
        time_data = convert_sec_since_to_std_time(time_data, dt.datetime(1993, 1, 1, 0, 0, 0))
        time_data = utils.expand_1d_to_2d_array(time_data[:, index_offset], len_x, axis=1)
        time_coord = Coord(time_data, Metadata(name='Profile_Time', standard_name='time', shape=time_data.shape,
                                               units=cis_standard_time_unit), "T")

        # create the object containing all coordinates
        coords = CoordList()
        coords.append(lat_coord)
        coords.append(lon_coord)
        coords.append(time_coord)
        coords.append(alt_coord)
        if self.include_pressure and (pres_data.shape == alt_data.shape):
            # For MODIS L1 this may is not be true, so skips the air pressure reading. If required for MODIS L1 then
            # some kind of interpolation of the air pressure would be required, as it is on a different (smaller) grid
            # than for the Lidar_Data_Altitudes.
            coords.append(pres_coord)

        return coords