Ejemplo n.º 1
0
    def read_data_list(self, filenames, variables, product=None, aliases=None):
        """
        Read multiple data objects. Files can be either gridded or ungridded but not a mix of both.

        :param filenames: One or more filenames of the files to read
        :type filenames: string or list
        :param variables: One or more variables to read from the files
        :type variables: string or list
        :param str product: Name of data product to use (optional)
        :param aliases: List of variable aliases to put on each variables
         data object as an alternative means of identifying them. (Optional)
        :return:  A list of the data read out (either a GriddedDataList or UngriddedDataList depending on the
         type of data contained in the files)
        """
        # if filenames or variables are not lists, make them lists of 1 element
        filenames = listify(filenames)
        variables = listify(variables)
        aliases = listify(aliases) if aliases else None

        variables = self._expand_wildcards(variables, filenames, product)

        data_list = None
        for idx, variable in enumerate(variables):
            var_data = self._get_data_func(filenames, variable, product)
            var_data.filenames = filenames
            if aliases:
                try:
                    var_data.alias = aliases[idx]
                except IndexError:
                    raise ValueError("Number of aliases does not match number of variables")
            if data_list is None:
                data_list = GriddedDataList() if var_data.is_gridded else UngriddedDataList()
            data_list.append(var_data)
        assert data_list is not None
        return data_list
Ejemplo n.º 2
0
def _tidy_ncdf_data(var, start=None, count=None, stride=None):
    from cis.utils import listify
    from numpy.ma import MaskedArray

    start = [] if start is None else listify(start)
    count = [] if count is None else listify(count)
    stride = [] if stride is None else listify(stride)
    dim_len = var.shape
    ndim = len(dim_len)

    # Assume full read of all omitted dimensions
    while len(start) < ndim:
        start += [0]
    while len(count) < ndim:
        count += [-1]
    while len(stride) < ndim:
        stride += [1]

    sl = (slice(x0, n if n >= 0 else l - x0, s)
          for x0, n, s, l in zip(start, count, stride, dim_len))

    data = var[sl]
    if isinstance(data, MaskedArray):
        return data
    else:
        return MaskedArray(data)
Ejemplo n.º 3
0
    def _create_fixed_value_coord(self, coord_axis, values, coord_units,
                                  points_counts, coord_name):
        """
        Create a coordinate with a fixed value
        :param coord_axis: Axis of the coordinate in the coords
        :param coord_name: The name of the coordinate
        :param coord_units: The units for the coordinate
        :param points_counts: Number of points for this coordinate, or list of sizes for multiple files
        :param values: Value of coordinate, or list of values for multiple files
        :return:
        """
        from cis.data_io.Coord import Coord

        values = listify(values)
        points_counts = listify(points_counts)
        all_points = np.array([])
        # Create separate arrays with values and sizes corresponding to each of the different input files.
        for value, points_count in zip(values, points_counts):
            file_points = np.ma.array(np.zeros(points_count) + float(value))
            all_points = np.append(all_points, file_points)
        metadata = Metadata(name=coord_name,
                            shape=all_points.shape,
                            units=coord_units,
                            range=(min(values), max(values)))
        return Coord(all_points, metadata, coord_axis)
Ejemplo n.º 4
0
    def __init__(self, attributes, variables):
        """
        Initialisation
        :param attributes: dictionary of attributes and their values (or list of dictionarys if multiple files read)
        :param variables: dictionary of variable names and NetCDF Variable objects
        (or list of dictionarys if multiple files read)
        :return: nothing
        """
        self.station = False
        self.station_latitude = None
        self.latitude_variable_name = None
        self.station_longitude = None
        self.longitude_variable_name = None
        self.altitude = None
        self.altitude_variable_name = None
        self.pressure_variable_name = None
        self.time_stamp_info = None
        self.time_dimensions = None

        self._attributes = [{k.lower(): v for k, v in attrs.items()} for attrs in listify(attributes)]
        if len(variables) == 0:
            raise InvalidVariableError("No variables in the file so the type of data is unknown")
        self._variables = variables[0].keys()
        self._variable_dimensions = [{name: var.dimensions for name, var in vars.items()}
                                     for vars in listify(variables)]
        self._check_has_variables_and_attributes()

        # Carry out these checks using the attributes from the first file as a 'master'
        if self.TIME_COORDINATE_NAME.lower() in self._attributes[0]:
            self.time_variable_name = self._get_coordinate_variable_name(self.TIME_COORDINATE_NAME, "time")

            if self.LATITUDE_COORDINATE_NAME.lower() in self._attributes[0]:
                self._lat_lon_var_specified_setup()
            elif self.STATION_LATITUDE_NAME.lower() in self._attributes[0]:
                self._stationary_setup()
            else:
                raise InvalidVariableError("No attributes indicating latitude, expecting '{}' or '{}'"
                                           .format(self.STATION_LATITUDE_NAME, self.LONGITUDE_COORDINATE_NAME))
        elif self.BEST_COORDINATES_NAME.lower() in self._attributes[0]:
            self._best_coordinates_setup()
        else:
            raise InvalidVariableError(
                "No attributes indicating time variable name, expecting either '{}' or 'Coordinates'"
                .format(self.TIME_COORDINATE_NAME))

        if self.CORRECTED_PRESSURE_VAR_NAME in self._variables:
            self.pressure_variable_name = self.CORRECTED_PRESSURE_VAR_NAME
        elif self.PRESSURE_VAR_NAME in self._variables:
            self.pressure_variable_name = self.PRESSURE_VAR_NAME
        else:
            self.pressure_variable_name = None

        if self.TIME_STAMP_INFO_NAME.lower() in self._attributes[0]:
            # Not all files will have the same timestamp -> Retrieve a list of timestamps for each file.
            self.time_stamp_info = [attrs[self.TIME_STAMP_INFO_NAME.lower()] for attrs in self._attributes]

        self.time_dimensions = self._variable_dimensions[0][self.time_variable_name]
Ejemplo n.º 5
0
Archivo: plot.py Proyecto: cpaulik/cis
 def _remove_length_one_dimensions(self, packed_data):
     from iris.util import squeeze
     from cis.data_io.gridded_data import GriddedData
     listify(packed_data)
     new_data_list = []
     for data in packed_data:
         if data.is_gridded:
             new_data_list.append(GriddedData.make_from_cube(squeeze(data)))
         else:
             new_data_list.append(data)
     return new_data_list
Ejemplo n.º 6
0
def _get_MODIS_SDS_data(sds, start=None, count=None, stride=None):
    """
    Reads raw data from an SD instance.

    :param sds: The specific sds instance to read
    :param start: List of indices to start reading from each dimension
    :param count: List of number of data to read from each dimension
    :param stride: List of strides to read from each dimension
    :return: A numpy array containing the raw data with missing data is replaced by NaN.
    """
    from cis.utils import create_masked_array_for_missing_data, listify
    from cis.data_io.products.MODIS import _apply_scaling_factor_MODIS
    from numpy.ma import masked_outside

    start = [] if start is None else listify(start)
    count = [] if count is None else listify(count)
    stride = [] if stride is None else listify(stride)
    _, ndim, dim_len, _, _ = sds.info()

    # Assume full read of all omitted dimensions
    while len(start) < ndim:
        start += [0]
    while len(count) < ndim:
        count += [-1]
    while len(stride) < ndim:
        stride += [1]

    # Allow lazy notation for "read all"
    count = [
        n if n >= 0 else l - x0 for x0, n, l in zip(start, count, dim_len)
    ]

    data = sds.get(start, count, stride).squeeze()
    attributes = sds.attributes()

    # Apply Fill Value
    missing_value = attributes.get('_FillValue', None)
    if missing_value is not None:
        data = create_masked_array_for_missing_data(data, missing_value)

    # Check for valid_range
    valid_range = attributes.get('valid_range', None)
    if valid_range is not None:
        data = masked_outside(data, *valid_range)

    # Offsets and scaling.
    add_offset = attributes.get('add_offset', 0.0)
    scale_factor = attributes.get('scale_factor', 1.0)
    data = _apply_scaling_factor_MODIS(data, scale_factor, add_offset)

    return data
Ejemplo n.º 7
0
    def _create_time_coord(self, timestamp, time_variable_name, data_variables, coord_axis='T', standard_name='time'):
        """
        Create a time coordinate, taking into account the fact that each file may have a different timestamp.
        :param timestamp: Timestamp or list of timestamps for
        :param time_variable_name: Name of the time variable
        :param data_variables: Dictionary containing one or multiple netCDF data variables for each variable name
        :param coord_axis: Axis, default 'T'
        :param standard_name: Coord standard name, default 'time'
        :return: Coordinate
        """
        from cis.data_io.Coord import Coord
        from six.moves import zip_longest

        timestamps = listify(timestamp)
        time_variables = data_variables[time_variable_name]
        time_coords = []
        # Create a coordinate for each separate file to account for differing timestamps
        for file_time_var, timestamp in zip_longest(time_variables, timestamps):
            metadata = get_metadata(file_time_var)
            metadata.standard_name = standard_name
            coord = Coord(file_time_var, metadata, coord_axis)
            coord.convert_to_std_time(timestamp)
            time_coords.append(coord)

        return Coord.from_many_coordinates(time_coords)
Ejemplo n.º 8
0
 def __init__(self, filenames):
     from cis.utils import listify
     from glob import glob
     if isinstance(filenames, str):
         self._filenames = glob(filenames)
     else:
         self._filenames = listify(filenames)
Ejemplo n.º 9
0
    def _create_time_coord(self, timestamp, time_variable_name, data_variables, coord_axis='T', standard_name='time'):
        """
        Create a time coordinate, taking into account the fact that each file may have a different timestamp.
        :param timestamp: Timestamp or list of timestamps for
        :param time_variable_name: Name of the time variable
        :param data_variables: Dictionary containing one or multiple netCDF data variables for each variable name
        :param coord_axis: Axis, default 'T'
        :param standard_name: Coord standard name, default 'time'
        :return: Coordinate
        """
        from iris.coords import AuxCoord
        from six.moves import zip_longest
        from cis.time_util import convert_time_using_time_stamp_info_to_std_time as convert, cis_standard_time_unit
        from cis.utils import concatenate

        timestamps = listify(timestamp)
        time_variables = data_variables[time_variable_name]
        time_data = []
        # Create a coordinate for each separate file to account for differing timestamps
        for file_time_var, timestamp in zip_longest(time_variables, timestamps):
            metadata = get_metadata(file_time_var)
            if timestamp is not None:
                time_d = convert(file_time_var[:], metadata.units, timestamp)
            else:
                time_d = metadata.units.convert(file_time_var[:], cis_standard_time_unit)
            time_data.append(time_d)

        return AuxCoord(concatenate(time_data), standard_name=standard_name, units=cis_standard_time_unit)
Ejemplo n.º 10
0
def read_many_files(filenames, usr_variables, dim=None):
    """
    Reads a single Variable from many NetCDF files. This method uses the netCDF4 MFDataset class and so is NOT
    suitable for NetCDF4 datasets (only 'CLASSIC' netcdf).

    :param filenames: A list of NetCDF filenames to read, or a string with wildcards.
    :param usr_variables: A list of variable (dataset) names to read from the files.
      The names must appear exactly as in in the NetCDF file.
    :param dim: The name of the dimension on which to aggregate the data. None is the default
      which tries to aggregate over the unlimited dimension
    :return: A list of variable instances constructed from all of the input files
    """
    from netCDF4 import MFDataset
    from cis.exceptions import InvalidVariableError

    usr_variables = listify(usr_variables)

    try:
        datafile = MFDataset(filenames, aggdim=dim)
    except RuntimeError as e:
        raise IOError(e)

    data = {}
    for variable in usr_variables:
        # Get data.
        try:
            data[variable] = datafile.variables[variable]
        except:
            raise InvalidVariableError('Variable {} not found in file {}.'.format(variable, filenames))

    return data
Ejemplo n.º 11
0
def read(filename, usr_variables):
    """
    Reads a Variable from a NetCDF file

    :param filename: The name (with path) of the NetCDF file to read.
    :param usr_variables: A variable (dataset) name to read from the files. The name must appear exactly as in in the
      NetCDF file. Variable names may be fully qualified NetCDF4 Hierarchical group variables in the form
      ``<group1>/<group2....>/<variable_name>``, e.g. ``AVHRR/Ch4CentralWavenumber``.
    :return: A Variable instance constructed from  the input file
    """
    from netCDF4 import Dataset

    usr_variables = listify(usr_variables)

    try:
        datafile = Dataset(filename)
    except RuntimeError as e:
        raise IOError(str(e))

    data = {}
    for full_variable in usr_variables:
        # Split the fully qualified variable (group/variable) into group and variable
        parts = full_variable.split("/")
        groups = parts[:-1]
        variable = parts[-1]
        current_group = datafile
        for group in groups:
            current_group = current_group.groups[group]
        try:
            data[full_variable] = current_group.variables[variable]
        except:
            raise InvalidVariableError(full_variable +
                                       ' could not be found in ' + filename)

    return data
Ejemplo n.º 12
0
def read_many_files_individually(filenames, usr_variables):
    """
    Read multiple Variables from many NetCDF files manually - i.e. not with MFDataset as this doesn't always work,
    in particular for NetCDF4 files.

    :param filenames: A list of NetCDF filenames to read, or a string with wildcards.
    :param usr_variables: A list of variable (dataset) names to read from the files. The names must appear exactly as
      in in the NetCDF file. Variable names may be fully qualified NetCDF4 Hierarchical group variables in the form
      ``<group1>/<group2....>/<variable_name>``, e.g. ``AVHRR/Ch4CentralWavenumber``.
    :return: A dictionary of lists of variable instances constructed from all of the input files with the fully
      qualified variable name as the key
    """
    from cis.utils import add_element_to_list_in_dict

    usr_variables = listify(usr_variables)

    var_data = {}

    for filename in filenames:

        var_dict = read(filename, usr_variables)
        for var in list(var_dict.keys()):
            add_element_to_list_in_dict(var_data, var, var_dict[var])

    return var_data
Ejemplo n.º 13
0
def read_many_files_individually(filenames, usr_variables):
    """
    Read multiple Variables from many NetCDF files manually - i.e. not with MFDataset as this doesn't always work,
    in particular for NetCDF4 files.

    :param filenames: A list of NetCDF filenames to read, or a string with wildcards.
    :param usr_variables: A list of variable (dataset) names to read from the files. The names must appear exactly as
      in in the NetCDF file. Variable names may be fully qualified NetCDF4 Hierarchical group variables in the form
      ``<group1>/<group2....>/<variable_name>``, e.g. ``AVHRR/Ch4CentralWavenumber``.
    :return: A dictionary of lists of variable instances constructed from all of the input files with the fully
      qualified variable name as the key
    """
    from cis.utils import add_element_to_list_in_dict

    usr_variables = listify(usr_variables)

    var_data = {}

    for filename in filenames:

        var_dict = read(filename, usr_variables)
        for var in list(var_dict.keys()):
            add_element_to_list_in_dict(var_data, var, var_dict[var])

    return var_data
Ejemplo n.º 14
0
def read_many_files(filenames, usr_variables, dim=None):
    """
    Reads a single Variable from many NetCDF files. This method uses the netCDF4 MFDataset class and so is NOT
    suitable for NetCDF4 datasets (only 'CLASSIC' netcdf).

    :param filenames: A list of NetCDF filenames to read, or a string with wildcards.
    :param usr_variables: A list of variable (dataset) names to read from the files.
      The names must appear exactly as in in the NetCDF file.
    :param dim: The name of the dimension on which to aggregate the data. None is the default
      which tries to aggregate over the unlimited dimension
    :return: A list of variable instances constructed from all of the input files
    """
    from netCDF4 import MFDataset
    from cis.exceptions import InvalidVariableError

    usr_variables = listify(usr_variables)

    try:
        datafile = MFDataset(filenames, aggdim=dim)
    except RuntimeError as e:
        raise IOError(e)

    data = {}
    for variable in usr_variables:
        # Get data.
        try:
            data[variable] = datafile.variables[variable]
        except:
            raise InvalidVariableError(
                'Variable {} not found in file {}.'.format(
                    variable, filenames))

    return data
Ejemplo n.º 15
0
def read(filename, usr_variables):
    """
    Reads a Variable from a NetCDF file

    :param filename: The name (with path) of the NetCDF file to read.
    :param usr_variables: A variable (dataset) name to read from the files. The name must appear exactly as in in the
      NetCDF file. Variable names may be fully qualified NetCDF4 Hierarchical group variables in the form
      ``<group1>/<group2....>/<variable_name>``, e.g. ``AVHRR/Ch4CentralWavenumber``.
    :return: A Variable instance constructed from  the input file
    """
    from netCDF4 import Dataset

    usr_variables = listify(usr_variables)

    try:
        datafile = Dataset(filename)
    except RuntimeError as e:
        raise IOError(str(e))

    data = {}
    for full_variable in usr_variables:
        # Split the fully qualified variable (group/variable) into group and variable
        parts = full_variable.split("/")
        groups = parts[:-1]
        variable = parts[-1]
        current_group = datafile
        for group in groups:
            current_group = current_group.groups[group]
        try:
            data[full_variable] = current_group.variables[variable]
        except:
            raise InvalidVariableError(full_variable + ' could not be found in ' + filename)

    return data
Ejemplo n.º 16
0
    def __init__(self, data, metadata, coords):
        from cis.data_io.Coord import CoordList
        from cis.utils import listify

        def getmask(arr):
            mask = np.ma.getmaskarray(arr)
            try:
                mask |= np.isnan(arr)
            except ValueError:
                pass
            return mask

        data = listify(data)
        metadata = listify(metadata)

        if isinstance(coords, list):
            self._coords = CoordList(coords)
        elif isinstance(coords, CoordList):
            self._coords = coords
        elif isinstance(coords, Coord):
            self._coords = CoordList([coords])
        else:
            raise ValueError("Invalid Coords type")

        # Throw out points where any coordinate is masked
        combined_mask = np.zeros(data[0].shape, dtype=bool)
        for coord in self._coords:
            combined_mask |= getmask(coord.data)
            for bound in np.moveaxis(coord.bounds, -1, 0):
                combined_mask |= getmask(bound)
            coord.update_shape()
            coord.update_range()

        if combined_mask.any():
            keep = np.logical_not(combined_mask)
            data = [variable[keep] for variable in data]
            for coord in self._coords:
                coord.data = coord.data[keep]
                new_bounds = np.array([
                    bound[keep]
                    for bound in np.moveaxis(coord.bounds, -1, 0)
                ])
                coord.bounds = np.moveaxis(new_bounds, 0, -1)
                coord.update_shape()
                coord.update_range()

        super(UngriddedCube, self).__init__(zip(data, metadata))
Ejemplo n.º 17
0
    def __init__(self, attributes, variables):
        """
        Initialisation
        :param attributes: dictionary of attributes and their values (or list of dictionarys if multiple files read)
        :param variables: dictionary of variable names and NetCDF Variable objects
        (or list of dictionarys if multiple files read)
        :return: nothing
        """
        self.station = False
        self.station_latitude = None
        self.latitude_variable_name = None
        self.station_longitude = None
        self.longitude_variable_name = None
        self.altitude = None
        self.altitude_variable_name = None
        self.pressure_variable_name = None
        self.time_stamp_info = None
        self.time_dimensions = None

        self._attributes = [{k.lower(): v for k, v in list(attrs.items())} for attrs in listify(attributes)]
        if len(variables) == 0:
            raise InvalidVariableError("No variables in the file so the type of data is unknown")
        self._variables = list(variables[0].keys())
        self._variable_dimensions = [{name: var.dimensions for name, var in list(vars.items())}
                                     for vars in listify(variables)]
        self._check_has_variables_and_attributes()

        # Carry out these checks using the attributes from the first file as a 'master'
        if self.TIME_COORDINATE_NAME.lower() in self._attributes[0]:
            self.time_variable_name = self._get_coordinate_variable_name(self.TIME_COORDINATE_NAME, "time")

            if self.LATITUDE_COORDINATE_NAME.lower() in self._attributes[0]:
                self._lat_lon_var_specified_setup()
            elif self.STATION_LATITUDE_NAME.lower() in self._attributes[0]:
                self._stationary_setup()
            else:
                raise InvalidVariableError("No attributes indicating latitude, expecting '{}' or '{}'"
                                           .format(self.STATION_LATITUDE_NAME, self.LONGITUDE_COORDINATE_NAME))
        elif self.BEST_COORDINATES_NAME.lower() in self._attributes[0]:
            self._best_coordinates_setup()
        else:
            raise InvalidVariableError(
                "No attributes indicating time variable name, expecting either '{}' or 'Coordinates'"
                .format(self.TIME_COORDINATE_NAME))

        if self.CORRECTED_PRESSURE_VAR_NAME in self._variables:
            self.pressure_variable_name = self.CORRECTED_PRESSURE_VAR_NAME
        elif self.PRESSURE_VAR_NAME in self._variables:
            self.pressure_variable_name = self.PRESSURE_VAR_NAME
        else:
            self.pressure_variable_name = None

        if self.TIME_STAMP_INFO_NAME.lower() in self._attributes[0]:
            # Not all files will have the same timestamp -> Retrieve a list of timestamps for each file.
            self.time_stamp_info = [attrs[self.TIME_STAMP_INFO_NAME.lower()] for attrs in self._attributes]

        self.time_dimensions = self._variable_dimensions[0][self.time_variable_name]
Ejemplo n.º 18
0
    def read_coordinates(self, filenames, product=None):
        """
        Read the coordinates from a file
        :param filenames:   The filename of the files to read
        :return: A CoordList object
        """

        # if filenames is not a list, make it a list of 1 element
        filenames = listify(filenames)

        return self._get_coords_func(filenames, product)
Ejemplo n.º 19
0
    def read_coordinates(self, filenames, product=None):
        """
        Read the coordinates from a file
        :param filenames:   The filename of the files to read
        :return: A CoordList object
        """

        # if filenames is not a list, make it a list of 1 element
        filenames = listify(filenames)

        return self._get_coords_func(filenames, product)
Ejemplo n.º 20
0
    def _create_fixed_value_coord(self, coord_axis, values, coord_units, points_counts, coord_name):
        """
        Create a coordinate with a fixed value
        :param coord_axis: Axis of the coordinate in the coords
        :param coord_name: The name of the coordinate
        :param coord_units: The units for the coordinate
        :param points_counts: Number of points for this coordinate, or list of sizes for multiple files
        :param values: Value of coordinate, or list of values for multiple files
        :return:
        """
        from cis.data_io.Coord import Coord

        values = listify(values)
        points_counts = listify(points_counts)
        all_points = np.array([])
        # Create separate arrays with values and sizes corresponding to each of the different input files.
        for value, points_count in zip(values, points_counts):
            file_points = np.ma.array(np.zeros(points_count) + float(value))
            all_points = np.append(all_points, file_points)
        metadata = Metadata(name=coord_name, shape=all_points.shape, units=coord_units,
                            range=(min(values), max(values)))
        return Coord(all_points, metadata, coord_axis)
Ejemplo n.º 21
0
def ncdf_read(filenames, variable, start=None, count=None, stride=None):
    """Returns variable, concatenated over a sequence of files."""
    from cis.data_io.netcdf import read, get_metadata
    from cis.utils import concatenate, listify

    data = []
    for f in listify(filenames):
        sdata = read(f, variable)
        var = sdata[variable]
        data.append(_tidy_ncdf_data(var, start, count, stride))

    metadata = get_metadata(var)

    return concatenate(data), metadata
Ejemplo n.º 22
0
    def _create_fixed_value_coord(self, coord_axis, values, coord_units, points_counts, coord_name):
        """
        Create a coordinate with a fixed value
        :param coord_axis: Axis of the coordinate in the coords
        :param coord_name: The name of the coordinate
        :param coord_units: The units for the coordinate
        :param points_counts: Number of points for this coordinate, or list of sizes for multiple files
        :param values: Value of coordinate, or list of values for multiple files
        :return:
        """
        from iris.coords import AuxCoord

        all_points = np.array(listify(values))

        return AuxCoord(all_points, units=coord_units, standard_name=coord_name)
Ejemplo n.º 23
0
    def _create_fixed_value_coord(self, coord_axis, values, coord_units,
                                  points_counts, coord_name):
        """
        Create a coordinate with a fixed value
        :param coord_axis: Axis of the coordinate in the coords
        :param coord_name: The name of the coordinate
        :param coord_units: The units for the coordinate
        :param points_counts: Number of points for this coordinate, or list of sizes for multiple files
        :param values: Value of coordinate, or list of values for multiple files
        :return:
        """
        from iris.coords import AuxCoord

        all_points = np.array(listify(values))

        return AuxCoord(all_points,
                        units=coord_units,
                        standard_name=coord_name)
Ejemplo n.º 24
0
def __read_hdf4(filename, variables):
    """
        A wrapper method for reading raw data from hdf4 files. This returns a dictionary of io handles
         for each VD and SD data types.

        :param filename:     A name of a file to read
        :param variables:    List of variables to read from the files

        :return: (sds_dict, vds_dict) A tuple of dictionaries, one for sds objects and another for vds
    """
    from cis.exceptions import InvalidVariableError
    from pyhdf.error import HDF4Error

    variables = utils.listify(variables)

    # I'd rather not have to make this check but for pyhdf 0.9.0 and hdf 4.2.9 on OS X the c-level read routine will at
    # some point call exit(138) when reading valid netcdf files (rather than returning a negative status).
    if not filename.endswith('.hdf'):
        raise IOError("Tried to read non HDF file: {}".format(filename))

    try:
        sds_dict = hdf_sd.read(filename, variables)

        # remove the variables identified as SD (i.e. the keys in sds_dict)
        # no need to try looking for them as VD variable
        # AND this can cause a crash in some version/implementations of the core HDF4 libraries!

        # First create a copy of the list in order for the original list to be left intact when elements are removed
        # from it, this enables the original list to be used when many files are read
        vdvariables = list(variables)
        for sds_dict_key in sds_dict:
            vdvariables.remove(sds_dict_key)

        vds_dict = hdf_vd.read(filename, vdvariables)
    except HDF4Error as e:
        joined_up_message = "".join(e)
        raise IOError(joined_up_message)

    for variable in variables:
        if variable not in sds_dict and variable not in vds_dict:
            raise InvalidVariableError("Could not find " + variable + " in file: " + filename)

    return sds_dict, vds_dict
Ejemplo n.º 25
0
def _read_hdf4(filename, variables):
    """
        A wrapper method for reading raw data from hdf4 files. This returns a dictionary of io handles
         for each VD and SD data types.

        :param filename:     A name of a file to read
        :param variables:    List of variables to read from the files

        :return: (sds_dict, vds_dict) A tuple of dictionaries, one for sds objects and another for vds
    """
    from cis.exceptions import InvalidVariableError
    from pyhdf.error import HDF4Error

    variables = utils.listify(variables)

    # I'd rather not have to make this check but for pyhdf 0.9.0 and hdf 4.2.9 on OS X the c-level read routine will at
    # some point call exit(138) when reading valid netcdf files (rather than returning a negative status).
    if not filename.endswith('.hdf'):
        raise IOError("Tried to read non HDF file: {}".format(filename))

    try:
        sds_dict = hdf_sd.read(filename, variables)

        # remove the variables identified as SD (i.e. the keys in sds_dict)
        # no need to try looking for them as VD variable
        # AND this can cause a crash in some version/implementations of the core HDF4 libraries!

        # First create a copy of the list in order for the original list to be left intact when elements are removed
        # from it, this enables the original list to be used when many files are read
        vdvariables = list(variables)
        for sds_dict_key in sds_dict:
            vdvariables.remove(sds_dict_key)

        vds_dict = hdf_vd.read(filename, vdvariables)
    except HDF4Error as e:
        raise IOError(str(e))

    for variable in variables:
        if variable not in sds_dict and variable not in vds_dict:
            raise InvalidVariableError("Could not find " + variable +
                                       " in file: " + filename)

    return sds_dict, vds_dict
Ejemplo n.º 26
0
    def __init__(self, data, metadata, data_retrieval_callback=None):
        """
        :param data:    The data handler (e.g. SDS instance) for the specific data type, or a numpy array of data
                        This can be a list of data handlers, or a single data handler
        :param metadata: Any associated metadata
        :param data_retrieval_callback: An, optional, method for retrieving data when needed
        """
        from cis.exceptions import InvalidDataTypeError
        from iris.cube import CubeMetadata
        import numpy as np

        self._data_flattened = None

        self.attributes = {}

        self.metadata = Metadata.from_CubeMetadata(metadata) if isinstance(metadata, CubeMetadata) else metadata

        if isinstance(data, np.ndarray):
            # If the data input is a numpy array we can just copy it in and ignore the data_manager
            self._data = data
            self._data_manager = None
            self._post_process()
        else:
            # If the data input wasn't a numpy array we assume it is a data reference (e.g. SDS) and we refer
            #  this as a 'data manager' as it is responsible for getting the actual data.

            self._data = None
            # Although the data can be a list or a single item it's useful to cast it
            #  to a list here to make accessing it consistent
            self._data_manager = listify(data)

            if data_retrieval_callback is not None:
                # Use the given data retrieval method
                self.retrieve_raw_data = data_retrieval_callback
            elif type(self._data_manager[0]).__name__ in static_mappings and \
                    all([type(d).__name__ == type(self._data_manager[0]).__name__ for d in self._data_manager]):
                # Check that we recognise the data manager and that they are all the same

                # Set the retrieve_raw_data method to it's mapped function name
                self.retrieve_raw_data = static_mappings[type(self._data_manager[0]).__name__]
            else:
                raise InvalidDataTypeError
Ejemplo n.º 27
0
def read(filename, variables=None, datadict=None):
    """
    Given a filename and a list of file names return a dictionary of VD data handles

    :param filename: full path to a single HDF4 file
    :param variables: A list of variables to read, if no variables are given, no variables are read
    :param datadict: A dictionary of variable name, data handle pairs to be appended to
    :return: An updated datadict with any new variables appended.
    """

    if not HDF:
        raise ImportError(
            "HDF support was not installed, please reinstall with pyhdf to read HDF files."
        )

    if datadict is None:
        datadict = {}

    variables = listify(variables)

    vs = None
    datafile = None
    try:
        datafile = HDF(filename)
        vs = datafile.vstart()

        for variable in variables:
            try:
                vd = vs.attach(variable)
                vd.detach()
                datadict[variable] = VDS(filename, variable)
            except:
                # ignore variable that failed
                pass
    finally:
        if vs is not None:
            vs.end()
        if datafile is not None:
            datafile.close()

    return datadict
Ejemplo n.º 28
0
def read(filename, variables=None, datadict=None):
    """
    Reads SD from a HDF4 file into a dictionary. 

    :param str filename: The name (with path) of the HDF file to read.
    :param iterable names: A sequence of variable (dataset) names to read from the
     file (default None, causing all variables to be read). The names must appear exactly as in in the HDF file.
    :param dict datadict: Optional dictionary to add data to, otherwise a new, empty dictionary is created
    :return: A dictionary containing data for requested variables. Missing data is replaced by NaN.
    """
    # Optional HDF import
    if not SD:
        raise ImportError(
            "HDF support was not installed, please reinstall with pyhdf to read HDF files."
        )

    # List of required variable names.
    # Open the file.
    datafile = None
    try:
        datafile = SD.SD(filename)
        sd_variables = list(datafile.datasets().keys())
    finally:
        if datafile is not None:
            datafile.end()

    if variables is None:
        requested_sd_variables = sd_variables
    else:
        requested_sd_variables = set(listify(variables)).intersection(
            set(sd_variables))

    # Create dictionary to hold data arrays for returning.
    if datadict is None:
        datadict = {}

    # Get data.
    for variable in requested_sd_variables:
        datadict[variable] = HDF_SDS(filename, variable)

    return datadict
Ejemplo n.º 29
0
def read(filename, variables=None, datadict=None):
    """
    Given a filename and a list of file names return a dictionary of VD data handles

    :param filename: full path to a single HDF4 file
    :param variables: A list of variables to read, if no variables are given, no variables are read
    :param datadict: A dictionary of variable name, data handle pairs to be appended to
    :return: An updated datadict with any new variables appended.
    """

    if not HDF:
        raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.")

    if datadict is None:
        datadict = {}

    variables = listify(variables)

    vs = None
    datafile = None
    try:
        datafile = HDF(filename)
        vs = datafile.vstart()

        for variable in variables:
            try:
                vd = vs.attach(variable)
                vd.detach()
                datadict[variable] = VDS(filename, variable)
            except:
                # ignore variable that failed
                pass
    finally:
        if vs is not None:
            vs.end()
        if datafile is not None:
            datafile.close()

    return datadict
Ejemplo n.º 30
0
def _collapse_gridded(data, coords, kernel):
    """
    Collapse a GriddedData or GriddedDataList based on the specified grids (currently only collapsing is available)
    :param GriddedData or GriddedDataList data: The data object to aggregate
    :param list of iris.coords.Coord or str coords: The coords to collapse
    :param str or iris.analysis.Aggregator kernel: The kernel to use in the aggregation
    :return:
    """
    from cis.aggregation.collapse_kernels import aggregation_kernels, MultiKernel
    from iris.analysis import Aggregator as IrisAggregator
    from cis.aggregation.gridded_collapsor import GriddedCollapsor
    from cis import __version__
    from cis.utils import listify

    # Ensure the coords are all Coord instances
    coords = [data._get_coord(c) for c in listify(coords)]

    # The kernel can be a string or object, so catch both defaults
    if kernel is None or kernel == '':
        kernel = 'moments'

    if isinstance(kernel, six.string_types):
        kernel_inst = aggregation_kernels[kernel]
    elif isinstance(kernel, (IrisAggregator, MultiKernel)):
        kernel_inst = kernel
    else:
        raise ValueError("Invalid kernel specified: " + str(kernel))

    aggregator = GriddedCollapsor(data, coords)
    data = aggregator(kernel_inst)

    history = "Collapsed using CIS version " + __version__ + \
              "\n variables: " + str(getattr(data, "var_name", "Unknown")) + \
              "\n from files: " + str(getattr(data, "filenames", "Unknown")) + \
              "\n over coordinates: " + ", ".join(c.name() for c in coords) + \
              "\n with kernel: " + str(kernel_inst) + "."
    data.add_history(history)

    return data
Ejemplo n.º 31
0
def _collapse_gridded(data, coords, kernel):
    """
    Collapse a GriddedData or GriddedDataList based on the specified grids (currently only collapsing is available)
    :param GriddedData or GriddedDataList data: The data object to aggregate
    :param list of iris.coords.Coord or str coords: The coords to collapse
    :param str or iris.analysis.Aggregator kernel: The kernel to use in the aggregation
    :return:
    """
    from cis.aggregation.collapse_kernels import aggregation_kernels, MultiKernel
    from iris.analysis import Aggregator as IrisAggregator
    from cis.aggregation.gridded_collapsor import GriddedCollapsor
    from cis import __version__
    from cis.utils import listify

    # Ensure the coords are all Coord instances
    coords = [data._get_coord(c) for c in listify(coords)]

    # The kernel can be a string or object, so catch both defaults
    if kernel is None or kernel == '':
        kernel = 'moments'

    if isinstance(kernel, six.string_types):
        kernel_inst = aggregation_kernels[kernel]
    elif isinstance(kernel, (IrisAggregator, MultiKernel)):
        kernel_inst = kernel
    else:
        raise ValueError("Invalid kernel specified: " + str(kernel))

    aggregator = GriddedCollapsor(data, coords)
    data = aggregator(kernel_inst)

    history = "Collapsed using CIS version " + __version__ + \
              "\n variables: " + str(getattr(data, "var_name", "Unknown")) + \
              "\n from files: " + str(getattr(data, "filenames", "Unknown")) + \
              "\n over coordinates: " + ", ".join(c.name() for c in coords) + \
              "\n with kernel: " + str(kernel_inst) + "."
    data.add_history(history)

    return data
Ejemplo n.º 32
0
def read(filename, variables=None, datadict=None):
    """
    Reads SD from a HDF4 file into a dictionary. 

    :param str filename: The name (with path) of the HDF file to read.
    :param iterable names: A sequence of variable (dataset) names to read from the
     file (default None, causing all variables to be read). The names must appear exactly as in in the HDF file.
    :param dict datadict: Optional dictionary to add data to, otherwise a new, empty dictionary is created
    :return: A dictionary containing data for requested variables. Missing data is replaced by NaN.
    """
    # Optional HDF import
    if not SD:
        raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.")

    # List of required variable names.
    # Open the file.
    datafile = None
    try:
        datafile = SD.SD(filename)
        sd_variables = datafile.datasets().keys()
    finally:
        if datafile is not None:
            datafile.end()

    if variables is None:
        requested_sd_variables = sd_variables
    else:
        requested_sd_variables = set(listify(variables)).intersection(set(sd_variables))

    # Create dictionary to hold data arrays for returning.
    if datadict is None:
        datadict = {}

    # Get data.
    for variable in requested_sd_variables:
        datadict[variable] = HDF_SDS(filename, variable)

    return datadict
Ejemplo n.º 33
0
    def _create_time_coord(self,
                           timestamp,
                           time_variable_name,
                           data_variables,
                           coord_axis='T',
                           standard_name='time'):
        """
        Create a time coordinate, taking into account the fact that each file may have a different timestamp.
        :param timestamp: Timestamp or list of timestamps for
        :param time_variable_name: Name of the time variable
        :param data_variables: Dictionary containing one or multiple netCDF data variables for each variable name
        :param coord_axis: Axis, default 'T'
        :param standard_name: Coord standard name, default 'time'
        :return: Coordinate
        """
        from iris.coords import AuxCoord
        from six.moves import zip_longest
        from cis.time_util import convert_time_using_time_stamp_info_to_std_time as convert, cis_standard_time_unit
        from cis.utils import concatenate

        timestamps = listify(timestamp)
        time_variables = data_variables[time_variable_name]
        time_data = []
        # Create a coordinate for each separate file to account for differing timestamps
        for file_time_var, timestamp in zip_longest(time_variables,
                                                    timestamps):
            metadata = get_metadata(file_time_var)
            if timestamp is not None:
                time_d = convert(file_time_var[:], metadata.units, timestamp)
            else:
                time_d = metadata.units.convert(file_time_var[:],
                                                cis_standard_time_unit)
            time_data.append(time_d)

        return AuxCoord(concatenate(time_data),
                        standard_name=standard_name,
                        units=cis_standard_time_unit)
Ejemplo n.º 34
0
def expand_filelist(filelist):
    """
    :param filelist: A single element, or list, or comma seperated string of filenames, wildcarded filenames or
     directories
    :return: A flat list of files which exist - with no duplicates
    :raises ValueError: if any of the files in the list do not exist.
    """
    import os
    import six
    from glob import glob
    from cis.utils import OrderedSet

    if isinstance(filelist, six.string_types):
        input_list = filelist.split(',')
    else:
        input_list = listify(filelist)

    # Ensure we don't get duplicates by making file_set a set
    file_set = OrderedSet()
    for element in input_list:
        if any(wildcard in element for wildcard in ['*', '?', ']', '}']):
            filelist = glob(element)
            filelist.sort()
            for filename in filelist:
                file_set.add(filename)
        elif os.path.isdir(element):
            filelist = os.listdir(element)
            filelist.sort()
            for a_file in filelist:
                full_file = os.path.join(element, a_file)
                if os.path.isfile(full_file):
                    file_set.add(full_file)
        elif os.path.isfile(element):
            file_set.add(element)
        else:
            raise ValueError("{} is not a valid filename".format(element))
    return list(file_set)
Ejemplo n.º 35
0
def expand_filelist(filelist):
    """
    :param filelist: A single element, or list, or comma seperated string of filenames, wildcarded filenames or
     directories
    :return: A flat list of files which exist - with no duplicates
    :raises ValueError: if any of the files in the list do not exist.
    """
    import os
    import six
    from glob import glob
    from cis.utils import OrderedSet

    if isinstance(filelist, six.string_types):
        input_list = filelist.split(',')
    else:
        input_list = listify(filelist)

    # Ensure we don't get duplicates by making file_set a set
    file_set = OrderedSet()
    for element in input_list:
        if any(wildcard in element for wildcard in ['*', '?', ']', '}']):
            filelist = glob(element)
            filelist.sort()
            for filename in filelist:
                file_set.add(filename)
        elif os.path.isdir(element):
            filelist = os.listdir(element)
            filelist.sort()
            for a_file in filelist:
                full_file = os.path.join(element, a_file)
                if os.path.isfile(full_file):
                    file_set.add(full_file)
        elif os.path.isfile(element):
            file_set.add(element)
        else:
            raise ValueError("{} is not a valid filename".format(element))
    return list(file_set)
Ejemplo n.º 36
0
 def _update_aux_factories(data, *args, **kwargs):
     from cis.utils import listify
     d_list = listify(data)
     for d in d_list:
         for factory in d.aux_factories:
             factory.update(*args, **kwargs)
Ejemplo n.º 37
0
 def _update_aux_factories(data, *args, **kwargs):
     from cis.utils import listify
     d_list = listify(data)
     for d in d_list:
         for factory in d.aux_factories:
             factory.update(*args, **kwargs)