def parse_non_spatial_labels( raw_labels: Mapping[str, str], dims: Sequence[Hashable], coords: Mapping[Hashable, xr.DataArray], allow_slices: bool = False, exception_type: type = ValueError) -> Mapping[str, Any]: xy_var_names = get_dataset_xy_var_names(coords, must_exist=False) if xy_var_names is None: raise exception_type(f'missing spatial coordinates') xy_dims = set(coords[xy_var_name].dims[0] for xy_var_name in xy_var_names) def to_datetime(datetime_str: str, dim_var: xr.DataArray): if datetime_str == 'current': return dim_var[-1] else: return pd.to_datetime(datetime_str) parsed_labels = {} for dim in dims: if dim in xy_dims: continue dim_var = coords[dim] label_str = raw_labels.get(dim) try: if label_str is None: label = dim_var.values[0] elif label_str == 'current': label = dim_var.values[-1] else: if '/' in label_str: label_strs = tuple(label_str.split('/', maxsplit=1)) else: label_strs = (label_str, ) if np.issubdtype(dim_var.dtype, np.floating): labels = tuple(map(float, label_strs)) elif np.issubdtype(dim_var.dtype, np.integer): labels = tuple(map(int, label_strs)) elif np.issubdtype(dim_var.dtype, np.datetime64): labels = tuple( to_datetime(label, dim_var) for label in label_strs) else: raise exception_type( f'unable to parse value {label_str!r} into a {dim_var.dtype!r}' ) if len(labels) == 1: label = labels[0] else: if allow_slices: label = slice(labels[0], labels[1]) elif np.issubdtype(dim_var.dtype, np.integer): label = labels[0] + (labels[1] - labels[0]) // 2 else: label = labels[0] + (labels[1] - labels[0]) / 2 parsed_labels[str(dim)] = label except ValueError as e: raise exception_type( f'{label_str!r} is not a valid value for dimension {dim!r}' ) from e return parsed_labels
def clip_dataset_by_geometry( dataset: xr.Dataset, geometry: GeometryLike, save_geometry_wkt: Union[str, bool] = False ) -> Optional[xr.Dataset]: """ Spatially clip a dataset according to the bounding box of a given geometry. :param dataset: The dataset :param geometry: A geometry-like object, see py:function:`convert_geometry`. :param save_geometry_wkt: If the value is a string, the effective intersection geometry is stored as a Geometry WKT string in the global attribute named by *save_geometry*. If the value is True, the name "geometry_wkt" is used. :return: The dataset spatial subset, or None if the bounding box of the dataset has a no or a zero area intersection with the bounding box of the geometry. """ xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True) intersection_geometry = intersect_geometries( get_dataset_bounds(dataset, xy_var_names=xy_var_names), geometry ) if intersection_geometry is None: return None return _clip_dataset_by_geometry( dataset, intersection_geometry, xy_var_names, save_geometry_wkt=save_geometry_wkt )
def is_dataset_y_axis_inverted( dataset: Union[xr.Dataset, xr.DataArray], xy_var_names: Tuple[str, str] = None ) -> bool: if xy_var_names is None: xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True) y_var = dataset[xy_var_names[1]] return float(y_var[0]) < float(y_var[-1])
def get_dataset_geometry(dataset: Union[xr.Dataset, xr.DataArray], xy_var_names: Tuple[str, str] = None) \ -> shapely.geometry.base.BaseGeometry: if xy_var_names is None: xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True) geo_bounds = get_dataset_bounds(dataset, xy_var_names=xy_var_names) if is_lon_lat_dataset(dataset, xy_var_names=xy_var_names): return get_box_split_bounds_geometry(*geo_bounds) else: return shapely.geometry.box(*geo_bounds)
def is_lon_lat_dataset(dataset: Union[xr.Dataset, xr.DataArray], xy_var_names: Tuple[str, str] = None) -> bool: if xy_var_names is None: xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True) x_var_name, y_var_name = xy_var_names if x_var_name == 'lon' and y_var_name == 'lat': return True x_var = dataset[x_var_name] y_var = dataset[y_var_name] return x_var.attrs.get('long_name') == 'longitude' and y_var.attrs.get( 'long_name') == 'latitude'
def get_dataset_bounds(dataset: Union[xr.Dataset, xr.DataArray], xy_var_names: Tuple[str, str] = None) -> Bounds: if xy_var_names is None: xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True) x_name, y_name = xy_var_names x_var, y_var = dataset.coords[x_name], dataset.coords[y_name] is_lon = xy_var_names[0] == 'lon' # Note, x_min > x_max then we intersect with the anti-meridian x_bnds_name = get_dataset_bounds_var_name(dataset, x_name) if x_bnds_name: x_bnds_var = dataset.coords[x_bnds_name] x1 = x_bnds_var[0, 0] x2 = x_bnds_var[0, 1] x3 = x_bnds_var[-1, 0] x4 = x_bnds_var[-1, 1] x_min = min(x1, x2) x_max = max(x3, x4) else: x_min = x_var[0] x_max = x_var[-1] delta = (x_max - x_min + (0 if x_max >= x_min or not is_lon else 360)) / (x_var.size - 1) x_min -= 0.5 * delta x_max += 0.5 * delta # Note, x-axis may be inverted y_bnds_name = get_dataset_bounds_var_name(dataset, y_name) if y_bnds_name: y_bnds_var = dataset.coords[y_bnds_name] y1 = y_bnds_var[0, 0] y2 = y_bnds_var[0, 1] y3 = y_bnds_var[-1, 0] y4 = y_bnds_var[-1, 1] y_min = min(y1, y2, y3, y4) y_max = max(y1, y2, y3, y4) else: y1 = y_var[0] y2 = y_var[-1] delta = abs(y2 - y1) / (y_var.size - 1) y_min = min(y1, y2) - 0.5 * delta y_max = max(y1, y2) + 0.5 * delta return float(x_min), float(y_min), float(x_max), float(y_max)
def verify_cube(dataset: xr.Dataset) -> List[str]: """ Verify the given *dataset* for being a valid xcube dataset. The tool verifies that *dataset* * defines two spatial x,y coordinate variables, that are 1D, non-empty, using correct units; * defines a time coordinate variables, that are 1D, non-empty, using correct units; * has valid bounds variables for spatial x,y and time coordinate variables, if any; * has any data variables and that they are valid, e.g. min. 3-D, all have same dimensions, have at least the dimensions dim(time), dim(y), dim(x) in that order. Returns a list of issues, which is empty if *dataset* is a valid xcube dataset. :param dataset: A dataset to be verified. :return: List of issues or empty list. """ report = [] xy_var_names = get_dataset_xy_var_names(dataset, must_exist=False) if xy_var_names is None: report.append(f"missing spatial x,y coordinate variables") time_var_name = get_dataset_time_var_name(dataset, must_exist=False) if time_var_name is None: report.append(f"missing time coordinate variable") if time_var_name: _check_time(dataset, time_var_name, report) if xy_var_names and is_lon_lat_dataset(dataset, xy_var_names=xy_var_names): _check_lon_or_lat(dataset, xy_var_names[0], -180., 180., report) _check_lon_or_lat(dataset, xy_var_names[1], -90., 90., report) if xy_var_names and time_var_name: _check_data_variables(dataset, xy_var_names, time_var_name, report) if xy_var_names: _check_coord_equidistance(dataset, xy_var_names[0], xy_var_names[0], report) _check_coord_equidistance(dataset, xy_var_names[1], xy_var_names[1], report) return report
def rasterize_features(dataset: xr.Dataset, features: Union[GeoDataFrame, GeoJSONFeatures], feature_props: Sequence[Name], var_props: Dict[Name, VarProps] = None, in_place: bool = False) -> Optional[xr.Dataset]: """ Rasterize feature properties given by *feature_props* of vector-data *features* as new variables into *dataset*. *dataset* must have two spatial 1-D coordinates, either ``lon`` and ``lat`` in degrees, reprojected coordinates, ``x`` and ``y``, or similar. *feature_props* is a sequence of names of feature properties that must exists in each feature of *features*. *features* may be passed as pandas.GeoDataFrame`` or as an iterable of GeoJSON features. Using the optional *var_props*, the properties of newly created variables from feature properties can be specified. It is a mapping of feature property names to mappings of variable properties. Here is an example variable properties mapping::: { 'name': 'land_class', # (str) - the variable's name, default is the feature property name; 'dtype' np.int16, # (str|np.dtype) - the variable's dtype, default is np.float64; 'fill_value': -1, # (bool|int|float|np.nparray) - the variable's fill value, default is np.nan; 'attrs': {}, # (Mapping[str, Any]) - the variable's fill value, default is {}; 'converter': int, # (Callable[[Any], Any]) - a converter function used to convert from property # feature value to variable value, default is float. } Currently, the coordinates of the geometries in the given *features* must use the same CRS as the given *dataset*. :param dataset: The xarray dataset. :param features: A ``geopandas.GeoDataFrame`` instance or a sequence of GeoJSON features. :param feature_props: Sequence of names of numeric feature properties to be rasterized. :param var_props: Optional mapping of feature property name to a name or a 5-tuple (name, dtype, fill_value, attributes, converter) for the new variable. :param in_place: Whether to add new variables to *dataset*. If False, a copy will be created and returned. :return: dataset with rasterized feature_property """ import geopandas var_props = var_props or {} xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True) dataset_bounds = get_dataset_bounds(dataset, xy_var_names=xy_var_names) ds_x_min, ds_y_min, ds_x_max, ds_y_max = dataset_bounds x_var_name, y_var_name = xy_var_names x_var, y_var = dataset[x_var_name], dataset[y_var_name] x_dim, y_dim = x_var.dims[0], y_var.dims[0] coords = {y_var_name: y_var, x_var_name: x_var} dims = (y_dim, x_dim) width = x_var.size height = y_var.size spatial_res = (ds_x_max - ds_x_min) / width if geopandas and isinstance(features, geopandas.GeoDataFrame): geo_data_frame = features else: geo_data_frame = geopandas.GeoDataFrame.from_features(features) for feature_property_name in feature_props: if feature_property_name not in geo_data_frame: raise ValueError( f'feature property {feature_property_name!r} not found') if not in_place: dataset = xr.Dataset(coords=dataset.coords, attrs=dataset.attrs) for row in range(len(geo_data_frame)): geometry = geo_data_frame.geometry[row] if geometry.is_empty or not geometry.is_valid: continue # TODO (forman): allow transforming geometry into CRS of dataset here intersection_geometry = intersect_geometries(dataset_bounds, geometry) if intersection_geometry is None: continue # TODO (forman): check, we should be able to drastically improve performance by generating # the mask for a dataset subset genereated by clipping against geometry mask_data = get_geometry_mask(width, height, intersection_geometry, ds_x_min, ds_y_min, spatial_res) mask = xr.DataArray(mask_data, coords=coords, dims=dims) for feature_property_name in feature_props: var_prop_mapping = var_props.get(feature_property_name, {}) var_name = var_prop_mapping.get( 'name', feature_property_name.replace(' ', '_')) var_dtype = var_prop_mapping.get('dtype', np.float64) var_fill_value = var_prop_mapping.get('fill_value', np.nan) var_attrs = var_prop_mapping.get('attrs', {}) converter = var_prop_mapping.get('converter', float) feature_property_value = converter( geo_data_frame[feature_property_name][row]) var_new = xr.DataArray(np.full((height, width), feature_property_value, dtype=var_dtype), coords=coords, dims=dims, attrs=var_attrs) if var_name not in dataset: var_old = xr.DataArray(np.full((height, width), var_fill_value, dtype=var_dtype), coords=coords, dims=dims, attrs=var_attrs) dataset[var_name] = var_old else: var_old = dataset[var_name] dataset[var_name] = var_new.where(mask, var_old) dataset[var_name].encoding.update(fill_value=var_fill_value) return dataset
def mask_dataset_by_geometry( dataset: xr.Dataset, geometry: GeometryLike, excluded_vars: Sequence[str] = None, no_clip: bool = False, save_geometry_mask: Union[str, bool] = False, save_geometry_wkt: Union[str, bool] = False) -> Optional[xr.Dataset]: """ Mask a dataset according to the given geometry. The cells of variables of the returned dataset will have NaN-values where their spatial coordinates are not intersecting the given geometry. :param dataset: The dataset :param geometry: A geometry-like object, see py:function:`convert_geometry`. :param excluded_vars: Optional sequence of names of data variables that should not be masked (but still may be clipped). :param no_clip: If True, the function will not clip the dataset before masking, this is, the returned dataset will have the same dimension size as the given *dataset*. :param save_geometry_mask: If the value is a string, the effective geometry mask array is stored as a 2D data variable named by *save_geometry_mask*. If the value is True, the name "geometry_mask" is used. :param save_geometry_wkt: If the value is a string, the effective intersection geometry is stored as a Geometry WKT string in the global attribute named by *save_geometry*. If the value is True, the name "geometry_wkt" is used. :return: The dataset spatial subset, or None if the bounding box of the dataset has a no or a zero area intersection with the bounding box of the geometry. """ geometry = convert_geometry(geometry) xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True) intersection_geometry = intersect_geometries( get_dataset_bounds(dataset, xy_var_names=xy_var_names), geometry) if intersection_geometry is None: return None if not no_clip: dataset = _clip_dataset_by_geometry(dataset, intersection_geometry, xy_var_names) ds_x_min, ds_y_min, ds_x_max, ds_y_max = get_dataset_bounds( dataset, xy_var_names=xy_var_names) x_var_name, y_var_name = xy_var_names x_var, y_var = dataset[x_var_name], dataset[y_var_name] width = x_var.size height = y_var.size spatial_res = (ds_x_max - ds_x_min) / width mask_data = get_geometry_mask(width, height, intersection_geometry, ds_x_min, ds_y_min, spatial_res) mask = xr.DataArray(mask_data, coords={ y_var_name: y_var, x_var_name: x_var }, dims=(y_var.dims[0], x_var.dims[0])) dataset_vars = {} for var_name, var in dataset.data_vars.items(): if not excluded_vars or var_name not in excluded_vars: dataset_vars[var_name] = var.where(mask) else: dataset_vars[var_name] = var masked_dataset = xr.Dataset(dataset_vars, coords=dataset.coords, attrs=dataset.attrs) _save_geometry_mask(masked_dataset, mask, save_geometry_mask) _save_geometry_wkt(masked_dataset, intersection_geometry, save_geometry_wkt) return masked_dataset