def unsqueeze_dataset(ds: xr.Dataset, dim: str, coord: int = 0, pos: int = 0) -> xr.Dataset: ds = ds.apply(unsqueeze_data_array, dim=dim, pos=pos, keep_attrs=True, coord=coord) return ds
def _resample_dataset(ds_master: xr.Dataset, ds_replica: xr.Dataset, method_us: int, method_ds: int, monitor: Monitor) -> xr.Dataset: """ Resample replica onto the grid of the master. This does spatial resampling the whole dataset, e.g., all variables in the replica dataset. This method works only if both datasets have (time, lat, lon) dimensions. Note that dataset attributes are not propagated due to currently undecided CDM attributes' set. :param ds_master: xr.Dataset whose lat/lon coordinates are used as the resampling grid :param ds_replica: xr.Dataset that will be resampled on the masters' grid :param method_us: Interpolation method for upsampling, see resampling.py :param method_ds: Interpolation method for downsampling, see resampling.py :param monitor: a progress monitor. :return: xr.Dataset The resampled replica dataset """ # Find lat/lon bounds of the intersection of master and replica grids. The # bounds should fall on pixel boundaries for both spatial dimensions for # both datasets lat_min, lat_max = _find_intersection(ds_master['lat'].values, ds_replica['lat'].values, global_bounds=(-90, 90)) lon_min, lon_max = _find_intersection(ds_master['lon'].values, ds_replica['lon'].values, global_bounds=(-180, 180)) # Subset replica dataset and master grid. We're not using here the subset # operation, because the subset operation may produce datasets that cross # the anti-meridian by design. However, such a disjoint dataset can not be # resampled using our current resampling methods. lat_slice = slice(lat_min, lat_max) lon_slice = slice(lon_min, lon_max) lon = ds_master['lon'].sel(lon=lon_slice) lat = ds_master['lat'].sel(lat=lat_slice) ds_replica = ds_replica.sel(lon=lon_slice, lat=lat_slice) # Don't do anything if datasets already have the same spatial definition if _grids_equal(ds_master, ds_replica): return ds_replica with monitor.starting("coregister dataset", len(ds_replica.data_vars)): kwargs = { 'lon': lon, 'lat': lat, 'method_us': method_us, 'method_ds': method_ds, 'parent_monitor': monitor } retset = ds_replica.apply(_resample_array, keep_attrs=True, **kwargs) return adjust_spatial_attrs(retset)
def _resample_dataset(ds_master: xr.Dataset, ds_replica: xr.Dataset, method_us: int, method_ds: int, monitor: Monitor) -> xr.Dataset: """ Resample replica onto the grid of the master. This does spatial resampling the whole dataset, e.g., all variables in the replica dataset. This method works only if both datasets have (time, lat, lon) dimensions. Note that dataset attributes are not propagated due to currently undecided CDM attributes' set. :param ds_master: xr.Dataset whose lat/lon coordinates are used as the resampling grid :param ds_replica: xr.Dataset that will be resampled on the masters' grid :param method_us: Interpolation method for upsampling, see resampling.py :param method_ds: Interpolation method for downsampling, see resampling.py :param monitor: a progress monitor. :return: xr.Dataset The resampled replica dataset """ # Find lat/lon bounds of the intersection of master and replica grids. The # bounds should fall on pixel boundaries for both spatial dimensions for # both datasets lat_min, lat_max = _find_intersection(ds_master['lat'].values, ds_replica['lat'].values, global_bounds=(-90, 90)) lon_min, lon_max = _find_intersection(ds_master['lon'].values, ds_replica['lon'].values, global_bounds=(-180, 180)) # Subset replica dataset and master grid. We're not using here the subset # operation, because the subset operation may produce datasets that cross # the anti-meridian by design. However, such a disjoint dataset can not be # resampled using our current resampling methods. lat_slice = slice(lat_min, lat_max) lon_slice = slice(lon_min, lon_max) lon = ds_master['lon'].sel(lon=lon_slice) lat = ds_master['lat'].sel(lat=lat_slice) ds_replica = ds_replica.sel(lon=lon_slice, lat=lat_slice) # Don't do anything if datasets already have the same spatial definition if _grids_equal(ds_master, ds_replica): return ds_replica with monitor.starting("coregister dataset", len(ds_replica.data_vars)): kwargs = {'lon': lon, 'lat': lat, 'method_us': method_us, 'method_ds': method_ds, 'parent_monitor': monitor} retset = ds_replica.apply(_resample_array, keep_attrs=True, **kwargs) return adjust_spatial_attrs(retset)
def _resample_dataset(ds_master: xr.Dataset, ds_slave: xr.Dataset, method_us: int, method_ds: int) -> xr.Dataset: """ Resample slave onto the grid of the master. This does spatial resampling the whole dataset, e.g., all variables in the slave dataset. This method works only if both datasets have (time, lat, lon) dimensions. Note that dataset attributes are not propagated due to currently undecided CDM attributes' set. :param ds_master: xr.Dataset whose lat/lon coordinates are used as the resampling grid :param ds_slave: xr.Dataset that will be resampled on the masters' grid :param method_us: Interpolation method for upsampling, see resampling.py :param method_ds: Interpolation method for downsampling, see resampling.py :return: xr.Dataset The resampled slave dataset """ # master_keys = ds_master.dims.keys() # slave_keys = ds_master.dims.keys() lon = ds_master['lon'] lat = ds_master['lat'] kwargs = {'lon': lon, 'lat': lat, 'method_us': method_us, 'method_ds': method_ds} return ds_slave.apply(_resample_array, **kwargs)
def zeros_like(ds: xr.Dataset, override_chunks=None): """Performant implementation of zeros_like. xr.zeros_like(ds).chunk(chunks) is very slow for datasets with many changes. Parameters ---------- ds : xr.Dataset Input dataset with dask-backed data variables. override_chunks : dict Dimension chunk-size pairs indicating any dimensions one would like to override the original chunk sizes along. For any dimensions that are not present, zeros_like will use the chunk size along that dimension for each variable in the input Dataset. Returns ------- xr.Dataset """ return ds.apply( _zeros_like_dataarray, override_chunks=override_chunks, keep_attrs=True )
def cast_back(data: xarray.Dataset, measurements: Iterable[Dict[str, Any]]) -> xarray.Dataset: """ Cast calculated statistic `Dataset` into intended data types. When going through intermediate representation as floats, restore `nodata` values in place of `NaN`s. """ assert isinstance(data, xarray.Dataset) measurements = { measurement['name']: measurement for measurement in measurements } data_vars = [name for name in data.data_vars] assert set(data_vars) == set(measurements.keys()) def cast(da): """ Cast `DataArray` into intended type. """ output_measurement = measurements[da.name] expected_dtype = np.dtype(output_measurement['dtype']) actual_dtype = da.dtype if actual_dtype.kind != 'f' or 'nodata' not in output_measurement: # did not go through intermediate representation # or nodata is unspecified if expected_dtype == actual_dtype: return da else: return da.astype(expected_dtype) # replace NaNs with nodata nans = np.isnan(da.values) clone = da.astype(expected_dtype) clone.values[nans] = output_measurement['nodata'] return clone return data.apply(cast, keep_attrs=True)