def create_coords(self, filenames, variable=None): """ Reads the coordinates and data if required from the files :param filenames: List of filenames to read coordinates from :param variable: load a variable for the data :return: Coordinates """ from cis.data_io.netcdf import read_many_files_individually from cis.data_io.Coord import Coord, CoordList from cis.exceptions import InvalidVariableError variables = [("longitude", "x"), ("latitude", "y"), ("altitude", "z"), ("time", "t"), ("air_pressure", "p")] dim_coords = CoordList() for v in variables: try: var_data = read_many_files_individually(filenames, v[0])[v[0]] dim_coords.append(Coord(var_data, get_metadata(var_data[0]), axis=v[1])) except InvalidVariableError: pass if variable is None: return UngriddedCoordinates(dim_coords) else: all_coords = self._add_aux_coordinate(dim_coords, filenames[0], 'DP_MID', dim_coords.get_coord(standard_name='time').data.size) usr_var_data = read_many_files_individually(filenames, variable)[variable] return UngriddedData(usr_var_data, get_metadata(usr_var_data[0]), all_coords)
class UngriddedCube(list): """Cube of variables that share a set of ungridded coordinates. Definitely not lazy in data management. Mostly copies UngriddedData except, Methods: __init__: Fork of cis.read_data_list to open necessary data and applies quality control. data_flattened: A 2D array where each variable is flattened. """ def __init__(self, data, metadata, coords): from cis.data_io.Coord import CoordList from cis.utils import listify def getmask(arr): mask = np.ma.getmaskarray(arr) try: mask |= np.isnan(arr) except ValueError: pass return mask data = listify(data) metadata = listify(metadata) if isinstance(coords, list): self._coords = CoordList(coords) elif isinstance(coords, CoordList): self._coords = coords elif isinstance(coords, Coord): self._coords = CoordList([coords]) else: raise ValueError("Invalid Coords type") # Throw out points where any coordinate is masked combined_mask = np.zeros(data[0].shape, dtype=bool) for coord in self._coords: combined_mask |= getmask(coord.data) coord.update_shape() coord.update_range() if combined_mask.any(): keep = np.logical_not(combined_mask) data = [variable[keep] for variable in data] for coord in self._coords: coord.data = coord.data[keep] coord.update_shape() coord.update_range() super(UngriddedCube, self).__init__(zip(data, metadata)) def __add__(self, rhs): if self._coords is rhs._coords: return super(UngriddedCube, self).__add__(rhs) else: raise NotImplementedError("Requires a single coordinate system") def __getitem__(self, item): data, meta = list.__getitem__(self, item) return UngriddedData(data, meta, self._coords) def __setitem__(self, key, value): _, meta = list.__getitem__(self, key) list.__setitem__(self, key, (value, meta)) def __iter__(self): for data, meta in list.__iter__(self): yield UngriddedData(data, meta, self._coords) def append(self, data, meta): super(UngriddedCube, self).append((data, meta)) def extend(self, iterable): for data, meta in iterable: self.append(data, meta) def coords(self, name_or_coord=None, standard_name=None, long_name=None, attributes=None, axis=None, var_name=None, dim_coords=True): """ :return: A list of coordinates in this UngriddedData object fitting the given criteria """ return self._coords.get_coords(name_or_coord, standard_name, long_name, attributes, axis, var_name) def coord(self, name_or_coord=None, standard_name=None, long_name=None, attributes=None, axis=None, var_name=None): """ :raise: CoordinateNotFoundError :return: A single coord given the same arguments as :meth:`coords`. """ return self._coords.get_coord(name_or_coord, standard_name, long_name, attributes, axis, var_name) @property def x(self): return self.coord(axis="X") @property def y(self): return self.coord(axis="Y") @property def t(self): return self.coord(axis="T") @property def lat(self): return self.coord(standard_name="latitude") @property def lon(self): return self.coord(standard_name="longitude") @property def time(self): return self.coord(axis="T") @property def data(self): data_zip = np.ma.stack([ data for data, _ in list.__iter__(self) ]) return np.moveaxis(data_zip, 0, -1) @property def data_flattened(self): data_zip = np.stack([ data.flatten().filled(np.nan) for data, _ in list.__iter__(self) ]) return data_zip.T @property def coords_flattened(self): all_coords = self.coords().find_standard_coords() return [ c.data_flattened if c is not None else None for c in all_coords ] def get_coordinates_points(self): """Returns a HyperPointView of the coordinates of all points.""" return UngriddedHyperPointView(self.coords_flattened, None) def get_all_points(self): """Returns a HyperPointView of all points.""" return UngriddedArrayPointView(self.coords_flattened, self.data_flattened) def get_non_masked_points(self): """Returns a HyperPointView for which the default iterator omits masked points.""" return UngriddedArrayPointView(self.coords_flattened, self.data_flattened, non_masked_iteration=True) def collocated_onto(self, sample, how='', kernel=None, missing_data_for_missing_sample=True, fill_value=None, var_name='', var_long_name='', var_units='', **kwargs): return sample.sampled_from(self, how=how, kernel=kernel, missing_data_for_missing_sample=missing_data_for_missing_sample, fill_value=fill_value, var_name=var_name, var_long_name=var_long_name, var_units=var_units, **kwargs) def aggregate(self, how=None, **kwargs): from cis.data_io.ungridded_data import _aggregate_ungridded agg = _aggregate_ungridded(self, how, **kwargs) # Return the single item if there's only one (this depends on the kernel used) if len(agg) == 1: agg = agg[0] return agg def _get_coord(self, name): from cis.utils import standard_axes def _try_coord(data, coord_dict): import cis.exceptions as cis_ex import iris.exceptions as iris_ex try: coord = data.coord(**coord_dict) except (iris_ex.CoordinateNotFoundError, cis_ex.CoordinateNotFoundError): coord = None return coord coord = _try_coord(self, dict(name_or_coord=name)) or _try_coord(self, dict(standard_name=name)) \ or _try_coord(self, dict(standard_name=standard_axes.get(name.upper(), None))) or \ _try_coord(self, dict(var_name=name)) or _try_coord(self, dict(axis=name)) return coord
class UngriddedCoordinates(CommonData): """ Wrapper (adaptor) class for the different types of possible ungridded data. """ def __init__(self, coords): """ Constructor :param coords: A list of the associated Coord objects """ from cis.data_io.Coord import CoordList, Coord if isinstance(coords, list): self._coords = CoordList(coords) elif isinstance(coords, CoordList): self._coords = coords elif isinstance(coords, Coord): self._coords = CoordList([coords]) else: raise ValueError("Invalid Coords type") self._post_process() all_coords = self._coords.find_standard_coords() self.coords_flattened = [(c.data_flattened if c is not None else None) for c in all_coords] def _post_process(self): """ Perform a post processing step on lazy loaded Coordinate Data :return: """ # Remove any points with missing coordinate values: combined_mask = numpy.zeros(self._coords[0].data_flattened.shape, dtype=bool) for coord in self._coords: combined_mask |= numpy.ma.getmaskarray(coord.data_flattened) if coord.data.dtype != 'object': combined_mask |= numpy.isnan(coord.data).flatten() if combined_mask.any(): n_points = numpy.count_nonzero(combined_mask) logging.warning("Identified {n_points} point(s) which were missing values for some or all coordinates - " "these points have been removed from the data.".format(n_points=n_points)) for coord in self._coords: coord.data = numpy.ma.masked_array(coord.data_flattened, mask=combined_mask).compressed() coord.update_shape() coord.update_range() @property def history(self): return "UngriddedCoordinates have no history" @property def x(self): return self.coord(axis='X') @property def y(self): return self.coord(axis='Y') @property def lat(self): return self.coord(standard_name='latitude') @property def lon(self): return self.coord(standard_name='longitude') @property def time(self): return self.coord(standard_name='time') def hyper_point(self, index): """ :param index: The index in the array to find the point for :return: A hyperpoint representing the data at that point """ from cis.data_io.hyperpoint import HyperPoint return HyperPoint(self.coord(standard_name='latitude').data.flat[index], self.coord(standard_name='longitude').data.flat[index], self.coord(standard_name='altitude').data.flat[index], self.coord(standard_name='time').data.flat[index], self.coord(standard_name='air_pressure').data.flat[index], None) def as_data_frame(self, copy=True): """ Convert an UngriddedCoordinates object to a Pandas DataFrame. :param copy: Create a copy of the data for the new DataFrame? Default is True. :return: A Pandas DataFrame representing the data and coordinates. Note that this won't include any metadata. """ return _coords_as_data_frame(self._coords) def coords(self, name_or_coord=None, standard_name=None, long_name=None, attributes=None, axis=None, dim_coords=True): """ :return: A list of coordinates in this UngriddedData object fitting the given criteria """ return self._coords.get_coords(name_or_coord, standard_name, long_name, attributes, axis) def coord(self, name_or_coord=None, standard_name=None, long_name=None, attributes=None, axis=None): """ :raise: CoordinateNotFoundError :return: A single coord given the same arguments as :meth:`coords`. """ return self._coords.get_coord(name_or_coord, standard_name, long_name, attributes, axis) def get_coordinates_points(self): return UngriddedHyperPointView(self.coords_flattened, None) def get_all_points(self): """Returns a HyperPointView of the points. :return: HyperPointView of all the data points """ return UngriddedHyperPointView(self.coords_flattened, None) def get_non_masked_points(self): """Returns a HyperPointView for which the default iterator omits masked points. :return: HyperPointView of the data points """ return UngriddedHyperPointView(self.coords_flattened, None, non_masked_iteration=True) @property def is_gridded(self): """Returns value indicating whether the data/coordinates are gridded. """ return False