def read_data_list(self, filenames, variables, product=None, aliases=None): """ Read multiple data objects. Files can be either gridded or ungridded but not a mix of both. :param filenames: One or more filenames of the files to read :type filenames: string or list :param variables: One or more variables to read from the files :type variables: string or list :param str product: Name of data product to use (optional) :param aliases: List of variable aliases to put on each variables data object as an alternative means of identifying them. (Optional) :return: A list of the data read out (either a GriddedDataList or UngriddedDataList depending on the type of data contained in the files) """ # if filenames or variables are not lists, make them lists of 1 element filenames = listify(filenames) variables = listify(variables) aliases = listify(aliases) if aliases else None variables = self._expand_wildcards(variables, filenames, product) data_list = None for idx, variable in enumerate(variables): var_data = self._get_data_func(filenames, variable, product) var_data.filenames = filenames if aliases: try: var_data.alias = aliases[idx] except IndexError: raise ValueError("Number of aliases does not match number of variables") if data_list is None: data_list = GriddedDataList() if var_data.is_gridded else UngriddedDataList() data_list.append(var_data) assert data_list is not None return data_list
def _tidy_ncdf_data(var, start=None, count=None, stride=None): from cis.utils import listify from numpy.ma import MaskedArray start = [] if start is None else listify(start) count = [] if count is None else listify(count) stride = [] if stride is None else listify(stride) dim_len = var.shape ndim = len(dim_len) # Assume full read of all omitted dimensions while len(start) < ndim: start += [0] while len(count) < ndim: count += [-1] while len(stride) < ndim: stride += [1] sl = (slice(x0, n if n >= 0 else l - x0, s) for x0, n, s, l in zip(start, count, stride, dim_len)) data = var[sl] if isinstance(data, MaskedArray): return data else: return MaskedArray(data)
def _create_fixed_value_coord(self, coord_axis, values, coord_units, points_counts, coord_name): """ Create a coordinate with a fixed value :param coord_axis: Axis of the coordinate in the coords :param coord_name: The name of the coordinate :param coord_units: The units for the coordinate :param points_counts: Number of points for this coordinate, or list of sizes for multiple files :param values: Value of coordinate, or list of values for multiple files :return: """ from cis.data_io.Coord import Coord values = listify(values) points_counts = listify(points_counts) all_points = np.array([]) # Create separate arrays with values and sizes corresponding to each of the different input files. for value, points_count in zip(values, points_counts): file_points = np.ma.array(np.zeros(points_count) + float(value)) all_points = np.append(all_points, file_points) metadata = Metadata(name=coord_name, shape=all_points.shape, units=coord_units, range=(min(values), max(values))) return Coord(all_points, metadata, coord_axis)
def __init__(self, attributes, variables): """ Initialisation :param attributes: dictionary of attributes and their values (or list of dictionarys if multiple files read) :param variables: dictionary of variable names and NetCDF Variable objects (or list of dictionarys if multiple files read) :return: nothing """ self.station = False self.station_latitude = None self.latitude_variable_name = None self.station_longitude = None self.longitude_variable_name = None self.altitude = None self.altitude_variable_name = None self.pressure_variable_name = None self.time_stamp_info = None self.time_dimensions = None self._attributes = [{k.lower(): v for k, v in attrs.items()} for attrs in listify(attributes)] if len(variables) == 0: raise InvalidVariableError("No variables in the file so the type of data is unknown") self._variables = variables[0].keys() self._variable_dimensions = [{name: var.dimensions for name, var in vars.items()} for vars in listify(variables)] self._check_has_variables_and_attributes() # Carry out these checks using the attributes from the first file as a 'master' if self.TIME_COORDINATE_NAME.lower() in self._attributes[0]: self.time_variable_name = self._get_coordinate_variable_name(self.TIME_COORDINATE_NAME, "time") if self.LATITUDE_COORDINATE_NAME.lower() in self._attributes[0]: self._lat_lon_var_specified_setup() elif self.STATION_LATITUDE_NAME.lower() in self._attributes[0]: self._stationary_setup() else: raise InvalidVariableError("No attributes indicating latitude, expecting '{}' or '{}'" .format(self.STATION_LATITUDE_NAME, self.LONGITUDE_COORDINATE_NAME)) elif self.BEST_COORDINATES_NAME.lower() in self._attributes[0]: self._best_coordinates_setup() else: raise InvalidVariableError( "No attributes indicating time variable name, expecting either '{}' or 'Coordinates'" .format(self.TIME_COORDINATE_NAME)) if self.CORRECTED_PRESSURE_VAR_NAME in self._variables: self.pressure_variable_name = self.CORRECTED_PRESSURE_VAR_NAME elif self.PRESSURE_VAR_NAME in self._variables: self.pressure_variable_name = self.PRESSURE_VAR_NAME else: self.pressure_variable_name = None if self.TIME_STAMP_INFO_NAME.lower() in self._attributes[0]: # Not all files will have the same timestamp -> Retrieve a list of timestamps for each file. self.time_stamp_info = [attrs[self.TIME_STAMP_INFO_NAME.lower()] for attrs in self._attributes] self.time_dimensions = self._variable_dimensions[0][self.time_variable_name]
def _remove_length_one_dimensions(self, packed_data): from iris.util import squeeze from cis.data_io.gridded_data import GriddedData listify(packed_data) new_data_list = [] for data in packed_data: if data.is_gridded: new_data_list.append(GriddedData.make_from_cube(squeeze(data))) else: new_data_list.append(data) return new_data_list
def _get_MODIS_SDS_data(sds, start=None, count=None, stride=None): """ Reads raw data from an SD instance. :param sds: The specific sds instance to read :param start: List of indices to start reading from each dimension :param count: List of number of data to read from each dimension :param stride: List of strides to read from each dimension :return: A numpy array containing the raw data with missing data is replaced by NaN. """ from cis.utils import create_masked_array_for_missing_data, listify from cis.data_io.products.MODIS import _apply_scaling_factor_MODIS from numpy.ma import masked_outside start = [] if start is None else listify(start) count = [] if count is None else listify(count) stride = [] if stride is None else listify(stride) _, ndim, dim_len, _, _ = sds.info() # Assume full read of all omitted dimensions while len(start) < ndim: start += [0] while len(count) < ndim: count += [-1] while len(stride) < ndim: stride += [1] # Allow lazy notation for "read all" count = [ n if n >= 0 else l - x0 for x0, n, l in zip(start, count, dim_len) ] data = sds.get(start, count, stride).squeeze() attributes = sds.attributes() # Apply Fill Value missing_value = attributes.get('_FillValue', None) if missing_value is not None: data = create_masked_array_for_missing_data(data, missing_value) # Check for valid_range valid_range = attributes.get('valid_range', None) if valid_range is not None: data = masked_outside(data, *valid_range) # Offsets and scaling. add_offset = attributes.get('add_offset', 0.0) scale_factor = attributes.get('scale_factor', 1.0) data = _apply_scaling_factor_MODIS(data, scale_factor, add_offset) return data
def _create_time_coord(self, timestamp, time_variable_name, data_variables, coord_axis='T', standard_name='time'): """ Create a time coordinate, taking into account the fact that each file may have a different timestamp. :param timestamp: Timestamp or list of timestamps for :param time_variable_name: Name of the time variable :param data_variables: Dictionary containing one or multiple netCDF data variables for each variable name :param coord_axis: Axis, default 'T' :param standard_name: Coord standard name, default 'time' :return: Coordinate """ from cis.data_io.Coord import Coord from six.moves import zip_longest timestamps = listify(timestamp) time_variables = data_variables[time_variable_name] time_coords = [] # Create a coordinate for each separate file to account for differing timestamps for file_time_var, timestamp in zip_longest(time_variables, timestamps): metadata = get_metadata(file_time_var) metadata.standard_name = standard_name coord = Coord(file_time_var, metadata, coord_axis) coord.convert_to_std_time(timestamp) time_coords.append(coord) return Coord.from_many_coordinates(time_coords)
def __init__(self, filenames): from cis.utils import listify from glob import glob if isinstance(filenames, str): self._filenames = glob(filenames) else: self._filenames = listify(filenames)
def _create_time_coord(self, timestamp, time_variable_name, data_variables, coord_axis='T', standard_name='time'): """ Create a time coordinate, taking into account the fact that each file may have a different timestamp. :param timestamp: Timestamp or list of timestamps for :param time_variable_name: Name of the time variable :param data_variables: Dictionary containing one or multiple netCDF data variables for each variable name :param coord_axis: Axis, default 'T' :param standard_name: Coord standard name, default 'time' :return: Coordinate """ from iris.coords import AuxCoord from six.moves import zip_longest from cis.time_util import convert_time_using_time_stamp_info_to_std_time as convert, cis_standard_time_unit from cis.utils import concatenate timestamps = listify(timestamp) time_variables = data_variables[time_variable_name] time_data = [] # Create a coordinate for each separate file to account for differing timestamps for file_time_var, timestamp in zip_longest(time_variables, timestamps): metadata = get_metadata(file_time_var) if timestamp is not None: time_d = convert(file_time_var[:], metadata.units, timestamp) else: time_d = metadata.units.convert(file_time_var[:], cis_standard_time_unit) time_data.append(time_d) return AuxCoord(concatenate(time_data), standard_name=standard_name, units=cis_standard_time_unit)
def read_many_files(filenames, usr_variables, dim=None): """ Reads a single Variable from many NetCDF files. This method uses the netCDF4 MFDataset class and so is NOT suitable for NetCDF4 datasets (only 'CLASSIC' netcdf). :param filenames: A list of NetCDF filenames to read, or a string with wildcards. :param usr_variables: A list of variable (dataset) names to read from the files. The names must appear exactly as in in the NetCDF file. :param dim: The name of the dimension on which to aggregate the data. None is the default which tries to aggregate over the unlimited dimension :return: A list of variable instances constructed from all of the input files """ from netCDF4 import MFDataset from cis.exceptions import InvalidVariableError usr_variables = listify(usr_variables) try: datafile = MFDataset(filenames, aggdim=dim) except RuntimeError as e: raise IOError(e) data = {} for variable in usr_variables: # Get data. try: data[variable] = datafile.variables[variable] except: raise InvalidVariableError('Variable {} not found in file {}.'.format(variable, filenames)) return data
def read(filename, usr_variables): """ Reads a Variable from a NetCDF file :param filename: The name (with path) of the NetCDF file to read. :param usr_variables: A variable (dataset) name to read from the files. The name must appear exactly as in in the NetCDF file. Variable names may be fully qualified NetCDF4 Hierarchical group variables in the form ``<group1>/<group2....>/<variable_name>``, e.g. ``AVHRR/Ch4CentralWavenumber``. :return: A Variable instance constructed from the input file """ from netCDF4 import Dataset usr_variables = listify(usr_variables) try: datafile = Dataset(filename) except RuntimeError as e: raise IOError(str(e)) data = {} for full_variable in usr_variables: # Split the fully qualified variable (group/variable) into group and variable parts = full_variable.split("/") groups = parts[:-1] variable = parts[-1] current_group = datafile for group in groups: current_group = current_group.groups[group] try: data[full_variable] = current_group.variables[variable] except: raise InvalidVariableError(full_variable + ' could not be found in ' + filename) return data
def read_many_files_individually(filenames, usr_variables): """ Read multiple Variables from many NetCDF files manually - i.e. not with MFDataset as this doesn't always work, in particular for NetCDF4 files. :param filenames: A list of NetCDF filenames to read, or a string with wildcards. :param usr_variables: A list of variable (dataset) names to read from the files. The names must appear exactly as in in the NetCDF file. Variable names may be fully qualified NetCDF4 Hierarchical group variables in the form ``<group1>/<group2....>/<variable_name>``, e.g. ``AVHRR/Ch4CentralWavenumber``. :return: A dictionary of lists of variable instances constructed from all of the input files with the fully qualified variable name as the key """ from cis.utils import add_element_to_list_in_dict usr_variables = listify(usr_variables) var_data = {} for filename in filenames: var_dict = read(filename, usr_variables) for var in list(var_dict.keys()): add_element_to_list_in_dict(var_data, var, var_dict[var]) return var_data
def read_many_files(filenames, usr_variables, dim=None): """ Reads a single Variable from many NetCDF files. This method uses the netCDF4 MFDataset class and so is NOT suitable for NetCDF4 datasets (only 'CLASSIC' netcdf). :param filenames: A list of NetCDF filenames to read, or a string with wildcards. :param usr_variables: A list of variable (dataset) names to read from the files. The names must appear exactly as in in the NetCDF file. :param dim: The name of the dimension on which to aggregate the data. None is the default which tries to aggregate over the unlimited dimension :return: A list of variable instances constructed from all of the input files """ from netCDF4 import MFDataset from cis.exceptions import InvalidVariableError usr_variables = listify(usr_variables) try: datafile = MFDataset(filenames, aggdim=dim) except RuntimeError as e: raise IOError(e) data = {} for variable in usr_variables: # Get data. try: data[variable] = datafile.variables[variable] except: raise InvalidVariableError( 'Variable {} not found in file {}.'.format( variable, filenames)) return data
def __init__(self, data, metadata, coords): from cis.data_io.Coord import CoordList from cis.utils import listify def getmask(arr): mask = np.ma.getmaskarray(arr) try: mask |= np.isnan(arr) except ValueError: pass return mask data = listify(data) metadata = listify(metadata) if isinstance(coords, list): self._coords = CoordList(coords) elif isinstance(coords, CoordList): self._coords = coords elif isinstance(coords, Coord): self._coords = CoordList([coords]) else: raise ValueError("Invalid Coords type") # Throw out points where any coordinate is masked combined_mask = np.zeros(data[0].shape, dtype=bool) for coord in self._coords: combined_mask |= getmask(coord.data) for bound in np.moveaxis(coord.bounds, -1, 0): combined_mask |= getmask(bound) coord.update_shape() coord.update_range() if combined_mask.any(): keep = np.logical_not(combined_mask) data = [variable[keep] for variable in data] for coord in self._coords: coord.data = coord.data[keep] new_bounds = np.array([ bound[keep] for bound in np.moveaxis(coord.bounds, -1, 0) ]) coord.bounds = np.moveaxis(new_bounds, 0, -1) coord.update_shape() coord.update_range() super(UngriddedCube, self).__init__(zip(data, metadata))
def __init__(self, attributes, variables): """ Initialisation :param attributes: dictionary of attributes and their values (or list of dictionarys if multiple files read) :param variables: dictionary of variable names and NetCDF Variable objects (or list of dictionarys if multiple files read) :return: nothing """ self.station = False self.station_latitude = None self.latitude_variable_name = None self.station_longitude = None self.longitude_variable_name = None self.altitude = None self.altitude_variable_name = None self.pressure_variable_name = None self.time_stamp_info = None self.time_dimensions = None self._attributes = [{k.lower(): v for k, v in list(attrs.items())} for attrs in listify(attributes)] if len(variables) == 0: raise InvalidVariableError("No variables in the file so the type of data is unknown") self._variables = list(variables[0].keys()) self._variable_dimensions = [{name: var.dimensions for name, var in list(vars.items())} for vars in listify(variables)] self._check_has_variables_and_attributes() # Carry out these checks using the attributes from the first file as a 'master' if self.TIME_COORDINATE_NAME.lower() in self._attributes[0]: self.time_variable_name = self._get_coordinate_variable_name(self.TIME_COORDINATE_NAME, "time") if self.LATITUDE_COORDINATE_NAME.lower() in self._attributes[0]: self._lat_lon_var_specified_setup() elif self.STATION_LATITUDE_NAME.lower() in self._attributes[0]: self._stationary_setup() else: raise InvalidVariableError("No attributes indicating latitude, expecting '{}' or '{}'" .format(self.STATION_LATITUDE_NAME, self.LONGITUDE_COORDINATE_NAME)) elif self.BEST_COORDINATES_NAME.lower() in self._attributes[0]: self._best_coordinates_setup() else: raise InvalidVariableError( "No attributes indicating time variable name, expecting either '{}' or 'Coordinates'" .format(self.TIME_COORDINATE_NAME)) if self.CORRECTED_PRESSURE_VAR_NAME in self._variables: self.pressure_variable_name = self.CORRECTED_PRESSURE_VAR_NAME elif self.PRESSURE_VAR_NAME in self._variables: self.pressure_variable_name = self.PRESSURE_VAR_NAME else: self.pressure_variable_name = None if self.TIME_STAMP_INFO_NAME.lower() in self._attributes[0]: # Not all files will have the same timestamp -> Retrieve a list of timestamps for each file. self.time_stamp_info = [attrs[self.TIME_STAMP_INFO_NAME.lower()] for attrs in self._attributes] self.time_dimensions = self._variable_dimensions[0][self.time_variable_name]
def read_coordinates(self, filenames, product=None): """ Read the coordinates from a file :param filenames: The filename of the files to read :return: A CoordList object """ # if filenames is not a list, make it a list of 1 element filenames = listify(filenames) return self._get_coords_func(filenames, product)
def ncdf_read(filenames, variable, start=None, count=None, stride=None): """Returns variable, concatenated over a sequence of files.""" from cis.data_io.netcdf import read, get_metadata from cis.utils import concatenate, listify data = [] for f in listify(filenames): sdata = read(f, variable) var = sdata[variable] data.append(_tidy_ncdf_data(var, start, count, stride)) metadata = get_metadata(var) return concatenate(data), metadata
def _create_fixed_value_coord(self, coord_axis, values, coord_units, points_counts, coord_name): """ Create a coordinate with a fixed value :param coord_axis: Axis of the coordinate in the coords :param coord_name: The name of the coordinate :param coord_units: The units for the coordinate :param points_counts: Number of points for this coordinate, or list of sizes for multiple files :param values: Value of coordinate, or list of values for multiple files :return: """ from iris.coords import AuxCoord all_points = np.array(listify(values)) return AuxCoord(all_points, units=coord_units, standard_name=coord_name)
def __read_hdf4(filename, variables): """ A wrapper method for reading raw data from hdf4 files. This returns a dictionary of io handles for each VD and SD data types. :param filename: A name of a file to read :param variables: List of variables to read from the files :return: (sds_dict, vds_dict) A tuple of dictionaries, one for sds objects and another for vds """ from cis.exceptions import InvalidVariableError from pyhdf.error import HDF4Error variables = utils.listify(variables) # I'd rather not have to make this check but for pyhdf 0.9.0 and hdf 4.2.9 on OS X the c-level read routine will at # some point call exit(138) when reading valid netcdf files (rather than returning a negative status). if not filename.endswith('.hdf'): raise IOError("Tried to read non HDF file: {}".format(filename)) try: sds_dict = hdf_sd.read(filename, variables) # remove the variables identified as SD (i.e. the keys in sds_dict) # no need to try looking for them as VD variable # AND this can cause a crash in some version/implementations of the core HDF4 libraries! # First create a copy of the list in order for the original list to be left intact when elements are removed # from it, this enables the original list to be used when many files are read vdvariables = list(variables) for sds_dict_key in sds_dict: vdvariables.remove(sds_dict_key) vds_dict = hdf_vd.read(filename, vdvariables) except HDF4Error as e: joined_up_message = "".join(e) raise IOError(joined_up_message) for variable in variables: if variable not in sds_dict and variable not in vds_dict: raise InvalidVariableError("Could not find " + variable + " in file: " + filename) return sds_dict, vds_dict
def _read_hdf4(filename, variables): """ A wrapper method for reading raw data from hdf4 files. This returns a dictionary of io handles for each VD and SD data types. :param filename: A name of a file to read :param variables: List of variables to read from the files :return: (sds_dict, vds_dict) A tuple of dictionaries, one for sds objects and another for vds """ from cis.exceptions import InvalidVariableError from pyhdf.error import HDF4Error variables = utils.listify(variables) # I'd rather not have to make this check but for pyhdf 0.9.0 and hdf 4.2.9 on OS X the c-level read routine will at # some point call exit(138) when reading valid netcdf files (rather than returning a negative status). if not filename.endswith('.hdf'): raise IOError("Tried to read non HDF file: {}".format(filename)) try: sds_dict = hdf_sd.read(filename, variables) # remove the variables identified as SD (i.e. the keys in sds_dict) # no need to try looking for them as VD variable # AND this can cause a crash in some version/implementations of the core HDF4 libraries! # First create a copy of the list in order for the original list to be left intact when elements are removed # from it, this enables the original list to be used when many files are read vdvariables = list(variables) for sds_dict_key in sds_dict: vdvariables.remove(sds_dict_key) vds_dict = hdf_vd.read(filename, vdvariables) except HDF4Error as e: raise IOError(str(e)) for variable in variables: if variable not in sds_dict and variable not in vds_dict: raise InvalidVariableError("Could not find " + variable + " in file: " + filename) return sds_dict, vds_dict
def __init__(self, data, metadata, data_retrieval_callback=None): """ :param data: The data handler (e.g. SDS instance) for the specific data type, or a numpy array of data This can be a list of data handlers, or a single data handler :param metadata: Any associated metadata :param data_retrieval_callback: An, optional, method for retrieving data when needed """ from cis.exceptions import InvalidDataTypeError from iris.cube import CubeMetadata import numpy as np self._data_flattened = None self.attributes = {} self.metadata = Metadata.from_CubeMetadata(metadata) if isinstance(metadata, CubeMetadata) else metadata if isinstance(data, np.ndarray): # If the data input is a numpy array we can just copy it in and ignore the data_manager self._data = data self._data_manager = None self._post_process() else: # If the data input wasn't a numpy array we assume it is a data reference (e.g. SDS) and we refer # this as a 'data manager' as it is responsible for getting the actual data. self._data = None # Although the data can be a list or a single item it's useful to cast it # to a list here to make accessing it consistent self._data_manager = listify(data) if data_retrieval_callback is not None: # Use the given data retrieval method self.retrieve_raw_data = data_retrieval_callback elif type(self._data_manager[0]).__name__ in static_mappings and \ all([type(d).__name__ == type(self._data_manager[0]).__name__ for d in self._data_manager]): # Check that we recognise the data manager and that they are all the same # Set the retrieve_raw_data method to it's mapped function name self.retrieve_raw_data = static_mappings[type(self._data_manager[0]).__name__] else: raise InvalidDataTypeError
def read(filename, variables=None, datadict=None): """ Given a filename and a list of file names return a dictionary of VD data handles :param filename: full path to a single HDF4 file :param variables: A list of variables to read, if no variables are given, no variables are read :param datadict: A dictionary of variable name, data handle pairs to be appended to :return: An updated datadict with any new variables appended. """ if not HDF: raise ImportError( "HDF support was not installed, please reinstall with pyhdf to read HDF files." ) if datadict is None: datadict = {} variables = listify(variables) vs = None datafile = None try: datafile = HDF(filename) vs = datafile.vstart() for variable in variables: try: vd = vs.attach(variable) vd.detach() datadict[variable] = VDS(filename, variable) except: # ignore variable that failed pass finally: if vs is not None: vs.end() if datafile is not None: datafile.close() return datadict
def read(filename, variables=None, datadict=None): """ Reads SD from a HDF4 file into a dictionary. :param str filename: The name (with path) of the HDF file to read. :param iterable names: A sequence of variable (dataset) names to read from the file (default None, causing all variables to be read). The names must appear exactly as in in the HDF file. :param dict datadict: Optional dictionary to add data to, otherwise a new, empty dictionary is created :return: A dictionary containing data for requested variables. Missing data is replaced by NaN. """ # Optional HDF import if not SD: raise ImportError( "HDF support was not installed, please reinstall with pyhdf to read HDF files." ) # List of required variable names. # Open the file. datafile = None try: datafile = SD.SD(filename) sd_variables = list(datafile.datasets().keys()) finally: if datafile is not None: datafile.end() if variables is None: requested_sd_variables = sd_variables else: requested_sd_variables = set(listify(variables)).intersection( set(sd_variables)) # Create dictionary to hold data arrays for returning. if datadict is None: datadict = {} # Get data. for variable in requested_sd_variables: datadict[variable] = HDF_SDS(filename, variable) return datadict
def read(filename, variables=None, datadict=None): """ Given a filename and a list of file names return a dictionary of VD data handles :param filename: full path to a single HDF4 file :param variables: A list of variables to read, if no variables are given, no variables are read :param datadict: A dictionary of variable name, data handle pairs to be appended to :return: An updated datadict with any new variables appended. """ if not HDF: raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.") if datadict is None: datadict = {} variables = listify(variables) vs = None datafile = None try: datafile = HDF(filename) vs = datafile.vstart() for variable in variables: try: vd = vs.attach(variable) vd.detach() datadict[variable] = VDS(filename, variable) except: # ignore variable that failed pass finally: if vs is not None: vs.end() if datafile is not None: datafile.close() return datadict
def _collapse_gridded(data, coords, kernel): """ Collapse a GriddedData or GriddedDataList based on the specified grids (currently only collapsing is available) :param GriddedData or GriddedDataList data: The data object to aggregate :param list of iris.coords.Coord or str coords: The coords to collapse :param str or iris.analysis.Aggregator kernel: The kernel to use in the aggregation :return: """ from cis.aggregation.collapse_kernels import aggregation_kernels, MultiKernel from iris.analysis import Aggregator as IrisAggregator from cis.aggregation.gridded_collapsor import GriddedCollapsor from cis import __version__ from cis.utils import listify # Ensure the coords are all Coord instances coords = [data._get_coord(c) for c in listify(coords)] # The kernel can be a string or object, so catch both defaults if kernel is None or kernel == '': kernel = 'moments' if isinstance(kernel, six.string_types): kernel_inst = aggregation_kernels[kernel] elif isinstance(kernel, (IrisAggregator, MultiKernel)): kernel_inst = kernel else: raise ValueError("Invalid kernel specified: " + str(kernel)) aggregator = GriddedCollapsor(data, coords) data = aggregator(kernel_inst) history = "Collapsed using CIS version " + __version__ + \ "\n variables: " + str(getattr(data, "var_name", "Unknown")) + \ "\n from files: " + str(getattr(data, "filenames", "Unknown")) + \ "\n over coordinates: " + ", ".join(c.name() for c in coords) + \ "\n with kernel: " + str(kernel_inst) + "." data.add_history(history) return data
def read(filename, variables=None, datadict=None): """ Reads SD from a HDF4 file into a dictionary. :param str filename: The name (with path) of the HDF file to read. :param iterable names: A sequence of variable (dataset) names to read from the file (default None, causing all variables to be read). The names must appear exactly as in in the HDF file. :param dict datadict: Optional dictionary to add data to, otherwise a new, empty dictionary is created :return: A dictionary containing data for requested variables. Missing data is replaced by NaN. """ # Optional HDF import if not SD: raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.") # List of required variable names. # Open the file. datafile = None try: datafile = SD.SD(filename) sd_variables = datafile.datasets().keys() finally: if datafile is not None: datafile.end() if variables is None: requested_sd_variables = sd_variables else: requested_sd_variables = set(listify(variables)).intersection(set(sd_variables)) # Create dictionary to hold data arrays for returning. if datadict is None: datadict = {} # Get data. for variable in requested_sd_variables: datadict[variable] = HDF_SDS(filename, variable) return datadict
def expand_filelist(filelist): """ :param filelist: A single element, or list, or comma seperated string of filenames, wildcarded filenames or directories :return: A flat list of files which exist - with no duplicates :raises ValueError: if any of the files in the list do not exist. """ import os import six from glob import glob from cis.utils import OrderedSet if isinstance(filelist, six.string_types): input_list = filelist.split(',') else: input_list = listify(filelist) # Ensure we don't get duplicates by making file_set a set file_set = OrderedSet() for element in input_list: if any(wildcard in element for wildcard in ['*', '?', ']', '}']): filelist = glob(element) filelist.sort() for filename in filelist: file_set.add(filename) elif os.path.isdir(element): filelist = os.listdir(element) filelist.sort() for a_file in filelist: full_file = os.path.join(element, a_file) if os.path.isfile(full_file): file_set.add(full_file) elif os.path.isfile(element): file_set.add(element) else: raise ValueError("{} is not a valid filename".format(element)) return list(file_set)
def _update_aux_factories(data, *args, **kwargs): from cis.utils import listify d_list = listify(data) for d in d_list: for factory in d.aux_factories: factory.update(*args, **kwargs)